diff --git a/openai_whisper-large-v3/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-large-v3/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e7a8078f8bd3117b30586f75e0ed30bcb37598ab
--- /dev/null
+++ b/openai_whisper-large-v3/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb07bab32dcd62ce653b5b288bd6c27bdc5a538be309f242e33ed05e1cb53457
+size 1273974400
diff --git a/openai_whisper-large-v3_turbo/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-large-v3_turbo/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..30b6ee15252d8f5a7300cc478ebc3ae34818ce35
--- /dev/null
+++ b/openai_whisper-large-v3_turbo/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f46870171555792f9e98d5266d2c7d885a18962093b3a9544fffa54dbe8df16
+size 1273974400
diff --git a/openai_whisper-large-v3_turbo/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-large-v3_turbo/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..36647663b426f6d8541eac884109dc0cca09a970
--- /dev/null
+++ b/openai_whisper-large-v3_turbo/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:745c0c3896c41cd6ad01b6c3ed852e0bb1cb2fd1ef579017c5cc9a8aff1d3c66
+size 1813201716
diff --git a/openai_whisper-small.en/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-small.en/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a6e6d36bf1711b026fc59a9a452f3d1bda53430d
--- /dev/null
+++ b/openai_whisper-small.en/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f92860042703b3679071e7eeb03c861e52bf0e1da38943cf7c37eb5fecfb3abe
+size 176323456
diff --git a/openai_whisper-small.en/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-small.en/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5f828fe8f81c5ffad70690230afe11b6d582d654
--- /dev/null
+++ b/openai_whisper-small.en/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a71a58c723a8c379fbc0ba666d6a3d3dd85d84d34ee8665697d2edab52f2f6b1
+size 307285808
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-small/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..26effa4133bbbe74215e0d5088259839b88ef10e
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d68f152b6573ac55203a3dc8383730e6ecde685c7d2a88815b89820c88e35371
+size 347
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-small/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f06d3ae86653eca79c3b7b8fbde3379d72185ee
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 768 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 768, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 156,
+      "Ios16.rsqrt" : 25,
+      "Ios16.mul" : 626,
+      "SliceByIndex" : 1008,
+      "Ios16.sub" : 25,
+      "Transpose" : 12,
+      "Ios16.einsum" : 1152,
+      "Ios16.conv" : 74,
+      "Ios16.add" : 50,
+      "Ios16.reduceMean" : 50,
+      "Ios16.softmax" : 576,
+      "Ios16.gelu" : 14,
+      "Ios16.batchNorm" : 25
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/model.mil b/openai_whisper-small/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0dd59f0859f9a655599067124f0583a5263a0be5
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,9382 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            tensor<int32, [2]> var_50 = const()[name = tensor<string, []>("op_50"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_56 = const()[name = tensor<string, []>("op_56"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_61 = const()[name = tensor<string, []>("op_61"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_66_pad_type_0 = const()[name = tensor<string, []>("op_66_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_66_pad_0 = const()[name = tensor<string, []>("op_66_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [768, 80, 1, 3]> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, [768, 80, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [768]> var_47_to_fp16 = const()[name = tensor<string, []>("op_47_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368768)))];
+            tensor<fp16, [1, 768, 1, 3000]> var_66_cast_fp16 = conv(bias = var_47_to_fp16, dilations = var_56, groups = var_61, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_50, weight = var_41_to_fp16, x = melspectrogram_features)[name = tensor<string, []>("op_66_cast_fp16")];
+            tensor<string, []> hidden_states_1_mode_0 = const()[name = tensor<string, []>("hidden_states_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_66_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [2]> var_90 = const()[name = tensor<string, []>("op_90"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_101 = const()[name = tensor<string, []>("op_101"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_106_pad_type_0 = const()[name = tensor<string, []>("op_106_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_106_pad_0 = const()[name = tensor<string, []>("op_106_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [768, 768, 1, 3]> var_81_to_fp16 = const()[name = tensor<string, []>("op_81_to_fp16"), val = tensor<fp16, [768, 768, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370368)))];
+            tensor<fp16, [768]> var_87_to_fp16 = const()[name = tensor<string, []>("op_87_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3909376)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_106_cast_fp16 = conv(bias = var_87_to_fp16, dilations = var_96, groups = var_101, pad = var_106_pad_0, pad_type = var_106_pad_type_0, strides = var_90, weight = var_81_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_106_cast_fp16")];
+            tensor<string, []> hidden_states_3_mode_0 = const()[name = tensor<string, []>("hidden_states_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_106_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_124_to_fp16 = const()[name = tensor<string, []>("op_124_to_fp16"), val = tensor<fp16, [1, 768, 1, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3910976)))];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_124_to_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_134 = const()[name = tensor<string, []>("op_134"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_152 = const()[name = tensor<string, []>("op_152"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_162, keep_dims = var_152, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_166 = const()[name = tensor<string, []>("op_166"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_167_cast_fp16 = reduce_mean(axes = var_166, keep_dims = var_152, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_167_cast_fp16")];
+            tensor<fp16, []> var_168_to_fp16 = const()[name = tensor<string, []>("op_168_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_169_cast_fp16 = add(x = var_167_cast_fp16, y = var_168_to_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_169_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6215040)))];
+            tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6216640)))];
+            tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6218240)))];
+            tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6219840)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_184 = const()[name = tensor<string, []>("op_184"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_186 = const()[name = tensor<string, []>("op_186"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6221440)))];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7401152)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_186, groups = var_151, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_184, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_190 = const()[name = tensor<string, []>("op_190"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_192 = const()[name = tensor<string, []>("op_192"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_1_pad_type_0 = const()[name = tensor<string, []>("key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_1_pad_0 = const()[name = tensor<string, []>("key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7402752)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_1_cast_fp16 = conv(dilations = var_192, groups = var_151, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = var_190, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<int32, [2]> var_197 = const()[name = tensor<string, []>("op_197"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_199 = const()[name = tensor<string, []>("op_199"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_1_pad_type_0 = const()[name = tensor<string, []>("value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_1_pad_0 = const()[name = tensor<string, []>("value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8582464)))];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9762176)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_199, groups = var_151, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = var_197, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_206_begin_0 = const()[name = tensor<string, []>("op_206_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_206_end_0 = const()[name = tensor<string, []>("op_206_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_206_end_mask_0 = const()[name = tensor<string, []>("op_206_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_206_cast_fp16 = slice_by_index(begin = var_206_begin_0, end = var_206_end_0, end_mask = var_206_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_206_cast_fp16")];
+            tensor<int32, [4]> var_210_begin_0 = const()[name = tensor<string, []>("op_210_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_210_end_0 = const()[name = tensor<string, []>("op_210_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_210_end_mask_0 = const()[name = tensor<string, []>("op_210_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<int32, [4]> var_214_begin_0 = const()[name = tensor<string, []>("op_214_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_214_end_0 = const()[name = tensor<string, []>("op_214_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_214_end_mask_0 = const()[name = tensor<string, []>("op_214_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
+            tensor<int32, [4]> var_218_begin_0 = const()[name = tensor<string, []>("op_218_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_218_end_0 = const()[name = tensor<string, []>("op_218_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_218_end_mask_0 = const()[name = tensor<string, []>("op_218_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_218_cast_fp16")];
+            tensor<int32, [4]> var_222_begin_0 = const()[name = tensor<string, []>("op_222_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_222_end_0 = const()[name = tensor<string, []>("op_222_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_222_end_mask_0 = const()[name = tensor<string, []>("op_222_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_222_cast_fp16")];
+            tensor<int32, [4]> var_226_begin_0 = const()[name = tensor<string, []>("op_226_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_226_end_0 = const()[name = tensor<string, []>("op_226_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_226_end_mask_0 = const()[name = tensor<string, []>("op_226_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_226_cast_fp16")];
+            tensor<int32, [4]> var_230_begin_0 = const()[name = tensor<string, []>("op_230_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_230_end_0 = const()[name = tensor<string, []>("op_230_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_230_end_mask_0 = const()[name = tensor<string, []>("op_230_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_230_cast_fp16")];
+            tensor<int32, [4]> var_234_begin_0 = const()[name = tensor<string, []>("op_234_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_234_end_0 = const()[name = tensor<string, []>("op_234_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_234_end_mask_0 = const()[name = tensor<string, []>("op_234_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<int32, [4]> var_238_begin_0 = const()[name = tensor<string, []>("op_238_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_238_end_0 = const()[name = tensor<string, []>("op_238_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_238_end_mask_0 = const()[name = tensor<string, []>("op_238_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
+            tensor<int32, [4]> var_242_begin_0 = const()[name = tensor<string, []>("op_242_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_242_end_0 = const()[name = tensor<string, []>("op_242_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_242_end_mask_0 = const()[name = tensor<string, []>("op_242_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_242_cast_fp16")];
+            tensor<int32, [4]> var_246_begin_0 = const()[name = tensor<string, []>("op_246_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_246_end_0 = const()[name = tensor<string, []>("op_246_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_246_end_mask_0 = const()[name = tensor<string, []>("op_246_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_246_cast_fp16")];
+            tensor<int32, [4]> var_250_begin_0 = const()[name = tensor<string, []>("op_250_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_250_end_0 = const()[name = tensor<string, []>("op_250_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_250_end_mask_0 = const()[name = tensor<string, []>("op_250_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_250_cast_fp16")];
+            tensor<int32, [4]> var_259_begin_0 = const()[name = tensor<string, []>("op_259_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_259_end_0 = const()[name = tensor<string, []>("op_259_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_259_end_mask_0 = const()[name = tensor<string, []>("op_259_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
+            tensor<int32, [4]> var_266_begin_0 = const()[name = tensor<string, []>("op_266_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_266_end_0 = const()[name = tensor<string, []>("op_266_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_266_end_mask_0 = const()[name = tensor<string, []>("op_266_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_266_cast_fp16")];
+            tensor<int32, [4]> var_273_begin_0 = const()[name = tensor<string, []>("op_273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_273_end_0 = const()[name = tensor<string, []>("op_273_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_273_end_mask_0 = const()[name = tensor<string, []>("op_273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_273_cast_fp16 = slice_by_index(begin = var_273_begin_0, end = var_273_end_0, end_mask = var_273_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_273_cast_fp16")];
+            tensor<int32, [4]> var_280_begin_0 = const()[name = tensor<string, []>("op_280_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_280_end_0 = const()[name = tensor<string, []>("op_280_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_280_end_mask_0 = const()[name = tensor<string, []>("op_280_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_280_cast_fp16 = slice_by_index(begin = var_280_begin_0, end = var_280_end_0, end_mask = var_280_end_mask_0, x = var_206_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<int32, [4]> var_287_begin_0 = const()[name = tensor<string, []>("op_287_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_287_end_0 = const()[name = tensor<string, []>("op_287_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_287_end_mask_0 = const()[name = tensor<string, []>("op_287_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_287_cast_fp16 = slice_by_index(begin = var_287_begin_0, end = var_287_end_0, end_mask = var_287_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<int32, [4]> var_294_begin_0 = const()[name = tensor<string, []>("op_294_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_294_end_0 = const()[name = tensor<string, []>("op_294_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_294_end_mask_0 = const()[name = tensor<string, []>("op_294_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<int32, [4]> var_301_begin_0 = const()[name = tensor<string, []>("op_301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_301_end_0 = const()[name = tensor<string, []>("op_301_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_301_end_mask_0 = const()[name = tensor<string, []>("op_301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_301_cast_fp16")];
+            tensor<int32, [4]> var_308_begin_0 = const()[name = tensor<string, []>("op_308_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_308_end_0 = const()[name = tensor<string, []>("op_308_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_308_end_mask_0 = const()[name = tensor<string, []>("op_308_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_308_cast_fp16")];
+            tensor<int32, [4]> var_315_begin_0 = const()[name = tensor<string, []>("op_315_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_315_end_0 = const()[name = tensor<string, []>("op_315_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_315_end_mask_0 = const()[name = tensor<string, []>("op_315_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_315_cast_fp16 = slice_by_index(begin = var_315_begin_0, end = var_315_end_0, end_mask = var_315_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<int32, [4]> var_322_begin_0 = const()[name = tensor<string, []>("op_322_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_322_end_0 = const()[name = tensor<string, []>("op_322_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_322_end_mask_0 = const()[name = tensor<string, []>("op_322_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_322_cast_fp16")];
+            tensor<int32, [4]> var_329_begin_0 = const()[name = tensor<string, []>("op_329_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_329_end_0 = const()[name = tensor<string, []>("op_329_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_329_end_mask_0 = const()[name = tensor<string, []>("op_329_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_329_cast_fp16 = slice_by_index(begin = var_329_begin_0, end = var_329_end_0, end_mask = var_329_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_329_cast_fp16")];
+            tensor<int32, [4]> var_336_begin_0 = const()[name = tensor<string, []>("op_336_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_336_end_0 = const()[name = tensor<string, []>("op_336_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_336_end_mask_0 = const()[name = tensor<string, []>("op_336_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = var_336_end_0, end_mask = var_336_end_mask_0, x = var_214_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
+            tensor<int32, [4]> var_343_begin_0 = const()[name = tensor<string, []>("op_343_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_343_end_0 = const()[name = tensor<string, []>("op_343_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_343_end_mask_0 = const()[name = tensor<string, []>("op_343_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
+            tensor<int32, [4]> var_350_begin_0 = const()[name = tensor<string, []>("op_350_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_350_end_0 = const()[name = tensor<string, []>("op_350_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_350_end_mask_0 = const()[name = tensor<string, []>("op_350_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_350_cast_fp16 = slice_by_index(begin = var_350_begin_0, end = var_350_end_0, end_mask = var_350_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_350_cast_fp16")];
+            tensor<int32, [4]> var_357_begin_0 = const()[name = tensor<string, []>("op_357_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_357_end_0 = const()[name = tensor<string, []>("op_357_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_357_end_mask_0 = const()[name = tensor<string, []>("op_357_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_357_cast_fp16 = slice_by_index(begin = var_357_begin_0, end = var_357_end_0, end_mask = var_357_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_357_cast_fp16")];
+            tensor<int32, [4]> var_364_begin_0 = const()[name = tensor<string, []>("op_364_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_364_end_0 = const()[name = tensor<string, []>("op_364_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_364_end_mask_0 = const()[name = tensor<string, []>("op_364_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = var_218_cast_fp16)[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<int32, [4]> var_371_begin_0 = const()[name = tensor<string, []>("op_371_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_371_end_0 = const()[name = tensor<string, []>("op_371_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_371_end_mask_0 = const()[name = tensor<string, []>("op_371_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_371_cast_fp16 = slice_by_index(begin = var_371_begin_0, end = var_371_end_0, end_mask = var_371_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_371_cast_fp16")];
+            tensor<int32, [4]> var_378_begin_0 = const()[name = tensor<string, []>("op_378_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_378_end_0 = const()[name = tensor<string, []>("op_378_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_378_end_mask_0 = const()[name = tensor<string, []>("op_378_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_378_cast_fp16 = slice_by_index(begin = var_378_begin_0, end = var_378_end_0, end_mask = var_378_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_378_cast_fp16")];
+            tensor<int32, [4]> var_385_begin_0 = const()[name = tensor<string, []>("op_385_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_385_end_0 = const()[name = tensor<string, []>("op_385_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_385_end_mask_0 = const()[name = tensor<string, []>("op_385_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_385_cast_fp16 = slice_by_index(begin = var_385_begin_0, end = var_385_end_0, end_mask = var_385_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_385_cast_fp16")];
+            tensor<int32, [4]> var_392_begin_0 = const()[name = tensor<string, []>("op_392_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_392_end_0 = const()[name = tensor<string, []>("op_392_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_392_end_mask_0 = const()[name = tensor<string, []>("op_392_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = var_222_cast_fp16)[name = tensor<string, []>("op_392_cast_fp16")];
+            tensor<int32, [4]> var_399_begin_0 = const()[name = tensor<string, []>("op_399_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_399_end_0 = const()[name = tensor<string, []>("op_399_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_399_end_mask_0 = const()[name = tensor<string, []>("op_399_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_399_cast_fp16")];
+            tensor<int32, [4]> var_406_begin_0 = const()[name = tensor<string, []>("op_406_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_406_end_0 = const()[name = tensor<string, []>("op_406_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_406_end_mask_0 = const()[name = tensor<string, []>("op_406_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_406_cast_fp16 = slice_by_index(begin = var_406_begin_0, end = var_406_end_0, end_mask = var_406_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_406_cast_fp16")];
+            tensor<int32, [4]> var_413_begin_0 = const()[name = tensor<string, []>("op_413_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_413_end_0 = const()[name = tensor<string, []>("op_413_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_413_end_mask_0 = const()[name = tensor<string, []>("op_413_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_413_cast_fp16 = slice_by_index(begin = var_413_begin_0, end = var_413_end_0, end_mask = var_413_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_413_cast_fp16")];
+            tensor<int32, [4]> var_420_begin_0 = const()[name = tensor<string, []>("op_420_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_420_end_0 = const()[name = tensor<string, []>("op_420_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_420_end_mask_0 = const()[name = tensor<string, []>("op_420_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = var_226_cast_fp16)[name = tensor<string, []>("op_420_cast_fp16")];
+            tensor<int32, [4]> var_427_begin_0 = const()[name = tensor<string, []>("op_427_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_427_end_0 = const()[name = tensor<string, []>("op_427_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_427_end_mask_0 = const()[name = tensor<string, []>("op_427_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_427_cast_fp16 = slice_by_index(begin = var_427_begin_0, end = var_427_end_0, end_mask = var_427_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_427_cast_fp16")];
+            tensor<int32, [4]> var_434_begin_0 = const()[name = tensor<string, []>("op_434_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_434_end_0 = const()[name = tensor<string, []>("op_434_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_434_end_mask_0 = const()[name = tensor<string, []>("op_434_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_434_cast_fp16 = slice_by_index(begin = var_434_begin_0, end = var_434_end_0, end_mask = var_434_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_434_cast_fp16")];
+            tensor<int32, [4]> var_441_begin_0 = const()[name = tensor<string, []>("op_441_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_441_end_0 = const()[name = tensor<string, []>("op_441_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_441_end_mask_0 = const()[name = tensor<string, []>("op_441_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_441_cast_fp16 = slice_by_index(begin = var_441_begin_0, end = var_441_end_0, end_mask = var_441_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_441_cast_fp16")];
+            tensor<int32, [4]> var_448_begin_0 = const()[name = tensor<string, []>("op_448_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_448_end_0 = const()[name = tensor<string, []>("op_448_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_448_end_mask_0 = const()[name = tensor<string, []>("op_448_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = var_230_cast_fp16)[name = tensor<string, []>("op_448_cast_fp16")];
+            tensor<int32, [4]> var_455_begin_0 = const()[name = tensor<string, []>("op_455_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_455_end_0 = const()[name = tensor<string, []>("op_455_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_455_end_mask_0 = const()[name = tensor<string, []>("op_455_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_455_cast_fp16")];
+            tensor<int32, [4]> var_462_begin_0 = const()[name = tensor<string, []>("op_462_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_462_end_0 = const()[name = tensor<string, []>("op_462_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_462_end_mask_0 = const()[name = tensor<string, []>("op_462_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_462_cast_fp16 = slice_by_index(begin = var_462_begin_0, end = var_462_end_0, end_mask = var_462_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_462_cast_fp16")];
+            tensor<int32, [4]> var_469_begin_0 = const()[name = tensor<string, []>("op_469_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_469_end_0 = const()[name = tensor<string, []>("op_469_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_469_end_mask_0 = const()[name = tensor<string, []>("op_469_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_469_cast_fp16 = slice_by_index(begin = var_469_begin_0, end = var_469_end_0, end_mask = var_469_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<int32, [4]> var_476_begin_0 = const()[name = tensor<string, []>("op_476_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_476_end_0 = const()[name = tensor<string, []>("op_476_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_476_end_mask_0 = const()[name = tensor<string, []>("op_476_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_476_cast_fp16 = slice_by_index(begin = var_476_begin_0, end = var_476_end_0, end_mask = var_476_end_mask_0, x = var_234_cast_fp16)[name = tensor<string, []>("op_476_cast_fp16")];
+            tensor<int32, [4]> var_483_begin_0 = const()[name = tensor<string, []>("op_483_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_483_end_0 = const()[name = tensor<string, []>("op_483_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_483_end_mask_0 = const()[name = tensor<string, []>("op_483_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_483_cast_fp16 = slice_by_index(begin = var_483_begin_0, end = var_483_end_0, end_mask = var_483_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<int32, [4]> var_490_begin_0 = const()[name = tensor<string, []>("op_490_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_490_end_0 = const()[name = tensor<string, []>("op_490_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_490_end_mask_0 = const()[name = tensor<string, []>("op_490_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<int32, [4]> var_497_begin_0 = const()[name = tensor<string, []>("op_497_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_497_end_0 = const()[name = tensor<string, []>("op_497_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_497_end_mask_0 = const()[name = tensor<string, []>("op_497_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_497_cast_fp16 = slice_by_index(begin = var_497_begin_0, end = var_497_end_0, end_mask = var_497_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_497_cast_fp16")];
+            tensor<int32, [4]> var_504_begin_0 = const()[name = tensor<string, []>("op_504_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_504_end_0 = const()[name = tensor<string, []>("op_504_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_504_end_mask_0 = const()[name = tensor<string, []>("op_504_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_504_cast_fp16 = slice_by_index(begin = var_504_begin_0, end = var_504_end_0, end_mask = var_504_end_mask_0, x = var_238_cast_fp16)[name = tensor<string, []>("op_504_cast_fp16")];
+            tensor<int32, [4]> var_511_begin_0 = const()[name = tensor<string, []>("op_511_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_511_end_0 = const()[name = tensor<string, []>("op_511_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_511_end_mask_0 = const()[name = tensor<string, []>("op_511_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_511_cast_fp16 = slice_by_index(begin = var_511_begin_0, end = var_511_end_0, end_mask = var_511_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_511_cast_fp16")];
+            tensor<int32, [4]> var_518_begin_0 = const()[name = tensor<string, []>("op_518_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_518_end_0 = const()[name = tensor<string, []>("op_518_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_518_end_mask_0 = const()[name = tensor<string, []>("op_518_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_518_cast_fp16")];
+            tensor<int32, [4]> var_525_begin_0 = const()[name = tensor<string, []>("op_525_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_525_end_0 = const()[name = tensor<string, []>("op_525_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_525_end_mask_0 = const()[name = tensor<string, []>("op_525_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_525_cast_fp16 = slice_by_index(begin = var_525_begin_0, end = var_525_end_0, end_mask = var_525_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<int32, [4]> var_532_begin_0 = const()[name = tensor<string, []>("op_532_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_532_end_0 = const()[name = tensor<string, []>("op_532_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_532_end_mask_0 = const()[name = tensor<string, []>("op_532_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_532_cast_fp16 = slice_by_index(begin = var_532_begin_0, end = var_532_end_0, end_mask = var_532_end_mask_0, x = var_242_cast_fp16)[name = tensor<string, []>("op_532_cast_fp16")];
+            tensor<int32, [4]> var_539_begin_0 = const()[name = tensor<string, []>("op_539_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_539_end_0 = const()[name = tensor<string, []>("op_539_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_539_end_mask_0 = const()[name = tensor<string, []>("op_539_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_539_cast_fp16 = slice_by_index(begin = var_539_begin_0, end = var_539_end_0, end_mask = var_539_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_539_cast_fp16")];
+            tensor<int32, [4]> var_546_begin_0 = const()[name = tensor<string, []>("op_546_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_546_end_0 = const()[name = tensor<string, []>("op_546_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_546_end_mask_0 = const()[name = tensor<string, []>("op_546_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
+            tensor<int32, [4]> var_553_begin_0 = const()[name = tensor<string, []>("op_553_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_553_end_0 = const()[name = tensor<string, []>("op_553_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_553_end_mask_0 = const()[name = tensor<string, []>("op_553_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<int32, [4]> var_560_begin_0 = const()[name = tensor<string, []>("op_560_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_560_end_0 = const()[name = tensor<string, []>("op_560_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_560_end_mask_0 = const()[name = tensor<string, []>("op_560_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_560_cast_fp16 = slice_by_index(begin = var_560_begin_0, end = var_560_end_0, end_mask = var_560_end_mask_0, x = var_246_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<int32, [4]> var_567_begin_0 = const()[name = tensor<string, []>("op_567_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_567_end_0 = const()[name = tensor<string, []>("op_567_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_567_end_mask_0 = const()[name = tensor<string, []>("op_567_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_567_cast_fp16 = slice_by_index(begin = var_567_begin_0, end = var_567_end_0, end_mask = var_567_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<int32, [4]> var_574_begin_0 = const()[name = tensor<string, []>("op_574_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_574_end_0 = const()[name = tensor<string, []>("op_574_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_574_end_mask_0 = const()[name = tensor<string, []>("op_574_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<int32, [4]> var_581_begin_0 = const()[name = tensor<string, []>("op_581_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_581_end_0 = const()[name = tensor<string, []>("op_581_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_581_end_mask_0 = const()[name = tensor<string, []>("op_581_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = var_581_end_0, end_mask = var_581_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_581_cast_fp16")];
+            tensor<int32, [4]> var_588_begin_0 = const()[name = tensor<string, []>("op_588_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_588_end_0 = const()[name = tensor<string, []>("op_588_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_588_end_mask_0 = const()[name = tensor<string, []>("op_588_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_588_cast_fp16 = slice_by_index(begin = var_588_begin_0, end = var_588_end_0, end_mask = var_588_end_mask_0, x = var_250_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<int32, [4]> k_1_perm_0 = const()[name = tensor<string, []>("k_1_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_593_begin_0 = const()[name = tensor<string, []>("op_593_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_593_end_0 = const()[name = tensor<string, []>("op_593_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_593_end_mask_0 = const()[name = tensor<string, []>("op_593_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_11 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_593_cast_fp16 = slice_by_index(begin = var_593_begin_0, end = var_593_end_0, end_mask = var_593_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_593_cast_fp16")];
+            tensor<int32, [4]> var_597_begin_0 = const()[name = tensor<string, []>("op_597_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_597_end_0 = const()[name = tensor<string, []>("op_597_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_597_end_mask_0 = const()[name = tensor<string, []>("op_597_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_597_cast_fp16 = slice_by_index(begin = var_597_begin_0, end = var_597_end_0, end_mask = var_597_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_597_cast_fp16")];
+            tensor<int32, [4]> var_601_begin_0 = const()[name = tensor<string, []>("op_601_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_601_end_0 = const()[name = tensor<string, []>("op_601_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_601_end_mask_0 = const()[name = tensor<string, []>("op_601_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_601_cast_fp16 = slice_by_index(begin = var_601_begin_0, end = var_601_end_0, end_mask = var_601_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_601_cast_fp16")];
+            tensor<int32, [4]> var_605_begin_0 = const()[name = tensor<string, []>("op_605_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_605_end_0 = const()[name = tensor<string, []>("op_605_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_605_end_mask_0 = const()[name = tensor<string, []>("op_605_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_605_cast_fp16 = slice_by_index(begin = var_605_begin_0, end = var_605_end_0, end_mask = var_605_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_605_cast_fp16")];
+            tensor<int32, [4]> var_609_begin_0 = const()[name = tensor<string, []>("op_609_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_609_end_0 = const()[name = tensor<string, []>("op_609_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_609_end_mask_0 = const()[name = tensor<string, []>("op_609_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_609_cast_fp16 = slice_by_index(begin = var_609_begin_0, end = var_609_end_0, end_mask = var_609_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_609_cast_fp16")];
+            tensor<int32, [4]> var_613_begin_0 = const()[name = tensor<string, []>("op_613_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_613_end_0 = const()[name = tensor<string, []>("op_613_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_613_end_mask_0 = const()[name = tensor<string, []>("op_613_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_613_cast_fp16 = slice_by_index(begin = var_613_begin_0, end = var_613_end_0, end_mask = var_613_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_613_cast_fp16")];
+            tensor<int32, [4]> var_617_begin_0 = const()[name = tensor<string, []>("op_617_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_617_end_0 = const()[name = tensor<string, []>("op_617_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_617_end_mask_0 = const()[name = tensor<string, []>("op_617_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_617_cast_fp16 = slice_by_index(begin = var_617_begin_0, end = var_617_end_0, end_mask = var_617_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<int32, [4]> var_621_begin_0 = const()[name = tensor<string, []>("op_621_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_621_end_0 = const()[name = tensor<string, []>("op_621_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_621_end_mask_0 = const()[name = tensor<string, []>("op_621_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_621_cast_fp16 = slice_by_index(begin = var_621_begin_0, end = var_621_end_0, end_mask = var_621_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_621_cast_fp16")];
+            tensor<int32, [4]> var_625_begin_0 = const()[name = tensor<string, []>("op_625_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_625_end_0 = const()[name = tensor<string, []>("op_625_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_625_end_mask_0 = const()[name = tensor<string, []>("op_625_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_625_cast_fp16 = slice_by_index(begin = var_625_begin_0, end = var_625_end_0, end_mask = var_625_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_625_cast_fp16")];
+            tensor<int32, [4]> var_629_begin_0 = const()[name = tensor<string, []>("op_629_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_629_end_0 = const()[name = tensor<string, []>("op_629_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_629_end_mask_0 = const()[name = tensor<string, []>("op_629_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_629_cast_fp16 = slice_by_index(begin = var_629_begin_0, end = var_629_end_0, end_mask = var_629_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_629_cast_fp16")];
+            tensor<int32, [4]> var_633_begin_0 = const()[name = tensor<string, []>("op_633_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_633_end_0 = const()[name = tensor<string, []>("op_633_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_633_end_mask_0 = const()[name = tensor<string, []>("op_633_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_633_cast_fp16 = slice_by_index(begin = var_633_begin_0, end = var_633_end_0, end_mask = var_633_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_633_cast_fp16")];
+            tensor<int32, [4]> var_637_begin_0 = const()[name = tensor<string, []>("op_637_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_637_end_0 = const()[name = tensor<string, []>("op_637_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_637_end_mask_0 = const()[name = tensor<string, []>("op_637_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_637_cast_fp16 = slice_by_index(begin = var_637_begin_0, end = var_637_end_0, end_mask = var_637_end_mask_0, x = transpose_11)[name = tensor<string, []>("op_637_cast_fp16")];
+            tensor<int32, [4]> var_639_begin_0 = const()[name = tensor<string, []>("op_639_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_639_end_0 = const()[name = tensor<string, []>("op_639_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_639_end_mask_0 = const()[name = tensor<string, []>("op_639_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_639_cast_fp16 = slice_by_index(begin = var_639_begin_0, end = var_639_end_0, end_mask = var_639_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_639_cast_fp16")];
+            tensor<int32, [4]> var_643_begin_0 = const()[name = tensor<string, []>("op_643_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_643_end_0 = const()[name = tensor<string, []>("op_643_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_643_end_mask_0 = const()[name = tensor<string, []>("op_643_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16 = slice_by_index(begin = var_643_begin_0, end = var_643_end_0, end_mask = var_643_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<int32, [4]> var_647_begin_0 = const()[name = tensor<string, []>("op_647_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_647_end_0 = const()[name = tensor<string, []>("op_647_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_647_end_mask_0 = const()[name = tensor<string, []>("op_647_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_647_cast_fp16 = slice_by_index(begin = var_647_begin_0, end = var_647_end_0, end_mask = var_647_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
+            tensor<int32, [4]> var_651_begin_0 = const()[name = tensor<string, []>("op_651_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_651_end_0 = const()[name = tensor<string, []>("op_651_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_651_end_mask_0 = const()[name = tensor<string, []>("op_651_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_651_cast_fp16 = slice_by_index(begin = var_651_begin_0, end = var_651_end_0, end_mask = var_651_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<int32, [4]> var_655_begin_0 = const()[name = tensor<string, []>("op_655_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_655_end_0 = const()[name = tensor<string, []>("op_655_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_655_end_mask_0 = const()[name = tensor<string, []>("op_655_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_655_cast_fp16 = slice_by_index(begin = var_655_begin_0, end = var_655_end_0, end_mask = var_655_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_655_cast_fp16")];
+            tensor<int32, [4]> var_659_begin_0 = const()[name = tensor<string, []>("op_659_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_659_end_0 = const()[name = tensor<string, []>("op_659_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_659_end_mask_0 = const()[name = tensor<string, []>("op_659_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_659_cast_fp16 = slice_by_index(begin = var_659_begin_0, end = var_659_end_0, end_mask = var_659_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_659_cast_fp16")];
+            tensor<int32, [4]> var_663_begin_0 = const()[name = tensor<string, []>("op_663_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_663_end_0 = const()[name = tensor<string, []>("op_663_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_663_end_mask_0 = const()[name = tensor<string, []>("op_663_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_663_cast_fp16 = slice_by_index(begin = var_663_begin_0, end = var_663_end_0, end_mask = var_663_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_663_cast_fp16")];
+            tensor<int32, [4]> var_667_begin_0 = const()[name = tensor<string, []>("op_667_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_667_end_0 = const()[name = tensor<string, []>("op_667_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_667_end_mask_0 = const()[name = tensor<string, []>("op_667_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_667_cast_fp16 = slice_by_index(begin = var_667_begin_0, end = var_667_end_0, end_mask = var_667_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_667_cast_fp16")];
+            tensor<int32, [4]> var_671_begin_0 = const()[name = tensor<string, []>("op_671_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_671_end_0 = const()[name = tensor<string, []>("op_671_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_671_end_mask_0 = const()[name = tensor<string, []>("op_671_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_671_cast_fp16")];
+            tensor<int32, [4]> var_675_begin_0 = const()[name = tensor<string, []>("op_675_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_675_end_0 = const()[name = tensor<string, []>("op_675_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_675_end_mask_0 = const()[name = tensor<string, []>("op_675_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_675_cast_fp16 = slice_by_index(begin = var_675_begin_0, end = var_675_end_0, end_mask = var_675_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_675_cast_fp16")];
+            tensor<int32, [4]> var_679_begin_0 = const()[name = tensor<string, []>("op_679_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_679_end_0 = const()[name = tensor<string, []>("op_679_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_679_end_mask_0 = const()[name = tensor<string, []>("op_679_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_679_cast_fp16 = slice_by_index(begin = var_679_begin_0, end = var_679_end_0, end_mask = var_679_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_679_cast_fp16")];
+            tensor<int32, [4]> var_683_begin_0 = const()[name = tensor<string, []>("op_683_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_683_end_0 = const()[name = tensor<string, []>("op_683_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_683_end_mask_0 = const()[name = tensor<string, []>("op_683_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_683_cast_fp16 = slice_by_index(begin = var_683_begin_0, end = var_683_end_0, end_mask = var_683_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_683_cast_fp16")];
+            tensor<string, []> var_687_equation_0 = const()[name = tensor<string, []>("op_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_687_cast_fp16 = einsum(equation = var_687_equation_0, values = (var_593_cast_fp16, var_259_cast_fp16))[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<fp16, []> var_688_to_fp16 = const()[name = tensor<string, []>("op_688_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1_cast_fp16 = mul(x = var_687_cast_fp16, y = var_688_to_fp16)[name = tensor<string, []>("aw_chunk_1_cast_fp16")];
+            tensor<string, []> var_691_equation_0 = const()[name = tensor<string, []>("op_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_691_cast_fp16 = einsum(equation = var_691_equation_0, values = (var_593_cast_fp16, var_266_cast_fp16))[name = tensor<string, []>("op_691_cast_fp16")];
+            tensor<fp16, []> var_692_to_fp16 = const()[name = tensor<string, []>("op_692_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_3_cast_fp16 = mul(x = var_691_cast_fp16, y = var_692_to_fp16)[name = tensor<string, []>("aw_chunk_3_cast_fp16")];
+            tensor<string, []> var_695_equation_0 = const()[name = tensor<string, []>("op_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_695_cast_fp16 = einsum(equation = var_695_equation_0, values = (var_593_cast_fp16, var_273_cast_fp16))[name = tensor<string, []>("op_695_cast_fp16")];
+            tensor<fp16, []> var_696_to_fp16 = const()[name = tensor<string, []>("op_696_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_5_cast_fp16 = mul(x = var_695_cast_fp16, y = var_696_to_fp16)[name = tensor<string, []>("aw_chunk_5_cast_fp16")];
+            tensor<string, []> var_699_equation_0 = const()[name = tensor<string, []>("op_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_699_cast_fp16 = einsum(equation = var_699_equation_0, values = (var_593_cast_fp16, var_280_cast_fp16))[name = tensor<string, []>("op_699_cast_fp16")];
+            tensor<fp16, []> var_700_to_fp16 = const()[name = tensor<string, []>("op_700_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_7_cast_fp16 = mul(x = var_699_cast_fp16, y = var_700_to_fp16)[name = tensor<string, []>("aw_chunk_7_cast_fp16")];
+            tensor<string, []> var_703_equation_0 = const()[name = tensor<string, []>("op_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_703_cast_fp16 = einsum(equation = var_703_equation_0, values = (var_597_cast_fp16, var_287_cast_fp16))[name = tensor<string, []>("op_703_cast_fp16")];
+            tensor<fp16, []> var_704_to_fp16 = const()[name = tensor<string, []>("op_704_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_9_cast_fp16 = mul(x = var_703_cast_fp16, y = var_704_to_fp16)[name = tensor<string, []>("aw_chunk_9_cast_fp16")];
+            tensor<string, []> var_707_equation_0 = const()[name = tensor<string, []>("op_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_707_cast_fp16 = einsum(equation = var_707_equation_0, values = (var_597_cast_fp16, var_294_cast_fp16))[name = tensor<string, []>("op_707_cast_fp16")];
+            tensor<fp16, []> var_708_to_fp16 = const()[name = tensor<string, []>("op_708_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_11_cast_fp16 = mul(x = var_707_cast_fp16, y = var_708_to_fp16)[name = tensor<string, []>("aw_chunk_11_cast_fp16")];
+            tensor<string, []> var_711_equation_0 = const()[name = tensor<string, []>("op_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_711_cast_fp16 = einsum(equation = var_711_equation_0, values = (var_597_cast_fp16, var_301_cast_fp16))[name = tensor<string, []>("op_711_cast_fp16")];
+            tensor<fp16, []> var_712_to_fp16 = const()[name = tensor<string, []>("op_712_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_13_cast_fp16 = mul(x = var_711_cast_fp16, y = var_712_to_fp16)[name = tensor<string, []>("aw_chunk_13_cast_fp16")];
+            tensor<string, []> var_715_equation_0 = const()[name = tensor<string, []>("op_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_715_cast_fp16 = einsum(equation = var_715_equation_0, values = (var_597_cast_fp16, var_308_cast_fp16))[name = tensor<string, []>("op_715_cast_fp16")];
+            tensor<fp16, []> var_716_to_fp16 = const()[name = tensor<string, []>("op_716_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_15_cast_fp16 = mul(x = var_715_cast_fp16, y = var_716_to_fp16)[name = tensor<string, []>("aw_chunk_15_cast_fp16")];
+            tensor<string, []> var_719_equation_0 = const()[name = tensor<string, []>("op_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_719_cast_fp16 = einsum(equation = var_719_equation_0, values = (var_601_cast_fp16, var_315_cast_fp16))[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<fp16, []> var_720_to_fp16 = const()[name = tensor<string, []>("op_720_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_17_cast_fp16 = mul(x = var_719_cast_fp16, y = var_720_to_fp16)[name = tensor<string, []>("aw_chunk_17_cast_fp16")];
+            tensor<string, []> var_723_equation_0 = const()[name = tensor<string, []>("op_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_723_cast_fp16 = einsum(equation = var_723_equation_0, values = (var_601_cast_fp16, var_322_cast_fp16))[name = tensor<string, []>("op_723_cast_fp16")];
+            tensor<fp16, []> var_724_to_fp16 = const()[name = tensor<string, []>("op_724_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_19_cast_fp16 = mul(x = var_723_cast_fp16, y = var_724_to_fp16)[name = tensor<string, []>("aw_chunk_19_cast_fp16")];
+            tensor<string, []> var_727_equation_0 = const()[name = tensor<string, []>("op_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_727_cast_fp16 = einsum(equation = var_727_equation_0, values = (var_601_cast_fp16, var_329_cast_fp16))[name = tensor<string, []>("op_727_cast_fp16")];
+            tensor<fp16, []> var_728_to_fp16 = const()[name = tensor<string, []>("op_728_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_21_cast_fp16 = mul(x = var_727_cast_fp16, y = var_728_to_fp16)[name = tensor<string, []>("aw_chunk_21_cast_fp16")];
+            tensor<string, []> var_731_equation_0 = const()[name = tensor<string, []>("op_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_731_cast_fp16 = einsum(equation = var_731_equation_0, values = (var_601_cast_fp16, var_336_cast_fp16))[name = tensor<string, []>("op_731_cast_fp16")];
+            tensor<fp16, []> var_732_to_fp16 = const()[name = tensor<string, []>("op_732_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_23_cast_fp16 = mul(x = var_731_cast_fp16, y = var_732_to_fp16)[name = tensor<string, []>("aw_chunk_23_cast_fp16")];
+            tensor<string, []> var_735_equation_0 = const()[name = tensor<string, []>("op_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_735_cast_fp16 = einsum(equation = var_735_equation_0, values = (var_605_cast_fp16, var_343_cast_fp16))[name = tensor<string, []>("op_735_cast_fp16")];
+            tensor<fp16, []> var_736_to_fp16 = const()[name = tensor<string, []>("op_736_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_25_cast_fp16 = mul(x = var_735_cast_fp16, y = var_736_to_fp16)[name = tensor<string, []>("aw_chunk_25_cast_fp16")];
+            tensor<string, []> var_739_equation_0 = const()[name = tensor<string, []>("op_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_739_cast_fp16 = einsum(equation = var_739_equation_0, values = (var_605_cast_fp16, var_350_cast_fp16))[name = tensor<string, []>("op_739_cast_fp16")];
+            tensor<fp16, []> var_740_to_fp16 = const()[name = tensor<string, []>("op_740_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_27_cast_fp16 = mul(x = var_739_cast_fp16, y = var_740_to_fp16)[name = tensor<string, []>("aw_chunk_27_cast_fp16")];
+            tensor<string, []> var_743_equation_0 = const()[name = tensor<string, []>("op_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_743_cast_fp16 = einsum(equation = var_743_equation_0, values = (var_605_cast_fp16, var_357_cast_fp16))[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<fp16, []> var_744_to_fp16 = const()[name = tensor<string, []>("op_744_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_29_cast_fp16 = mul(x = var_743_cast_fp16, y = var_744_to_fp16)[name = tensor<string, []>("aw_chunk_29_cast_fp16")];
+            tensor<string, []> var_747_equation_0 = const()[name = tensor<string, []>("op_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_747_cast_fp16 = einsum(equation = var_747_equation_0, values = (var_605_cast_fp16, var_364_cast_fp16))[name = tensor<string, []>("op_747_cast_fp16")];
+            tensor<fp16, []> var_748_to_fp16 = const()[name = tensor<string, []>("op_748_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_31_cast_fp16 = mul(x = var_747_cast_fp16, y = var_748_to_fp16)[name = tensor<string, []>("aw_chunk_31_cast_fp16")];
+            tensor<string, []> var_751_equation_0 = const()[name = tensor<string, []>("op_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_751_cast_fp16 = einsum(equation = var_751_equation_0, values = (var_609_cast_fp16, var_371_cast_fp16))[name = tensor<string, []>("op_751_cast_fp16")];
+            tensor<fp16, []> var_752_to_fp16 = const()[name = tensor<string, []>("op_752_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_33_cast_fp16 = mul(x = var_751_cast_fp16, y = var_752_to_fp16)[name = tensor<string, []>("aw_chunk_33_cast_fp16")];
+            tensor<string, []> var_755_equation_0 = const()[name = tensor<string, []>("op_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_755_cast_fp16 = einsum(equation = var_755_equation_0, values = (var_609_cast_fp16, var_378_cast_fp16))[name = tensor<string, []>("op_755_cast_fp16")];
+            tensor<fp16, []> var_756_to_fp16 = const()[name = tensor<string, []>("op_756_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_35_cast_fp16 = mul(x = var_755_cast_fp16, y = var_756_to_fp16)[name = tensor<string, []>("aw_chunk_35_cast_fp16")];
+            tensor<string, []> var_759_equation_0 = const()[name = tensor<string, []>("op_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_759_cast_fp16 = einsum(equation = var_759_equation_0, values = (var_609_cast_fp16, var_385_cast_fp16))[name = tensor<string, []>("op_759_cast_fp16")];
+            tensor<fp16, []> var_760_to_fp16 = const()[name = tensor<string, []>("op_760_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_37_cast_fp16 = mul(x = var_759_cast_fp16, y = var_760_to_fp16)[name = tensor<string, []>("aw_chunk_37_cast_fp16")];
+            tensor<string, []> var_763_equation_0 = const()[name = tensor<string, []>("op_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_763_cast_fp16 = einsum(equation = var_763_equation_0, values = (var_609_cast_fp16, var_392_cast_fp16))[name = tensor<string, []>("op_763_cast_fp16")];
+            tensor<fp16, []> var_764_to_fp16 = const()[name = tensor<string, []>("op_764_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_39_cast_fp16 = mul(x = var_763_cast_fp16, y = var_764_to_fp16)[name = tensor<string, []>("aw_chunk_39_cast_fp16")];
+            tensor<string, []> var_767_equation_0 = const()[name = tensor<string, []>("op_767_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_767_cast_fp16 = einsum(equation = var_767_equation_0, values = (var_613_cast_fp16, var_399_cast_fp16))[name = tensor<string, []>("op_767_cast_fp16")];
+            tensor<fp16, []> var_768_to_fp16 = const()[name = tensor<string, []>("op_768_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_41_cast_fp16 = mul(x = var_767_cast_fp16, y = var_768_to_fp16)[name = tensor<string, []>("aw_chunk_41_cast_fp16")];
+            tensor<string, []> var_771_equation_0 = const()[name = tensor<string, []>("op_771_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_771_cast_fp16 = einsum(equation = var_771_equation_0, values = (var_613_cast_fp16, var_406_cast_fp16))[name = tensor<string, []>("op_771_cast_fp16")];
+            tensor<fp16, []> var_772_to_fp16 = const()[name = tensor<string, []>("op_772_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_43_cast_fp16 = mul(x = var_771_cast_fp16, y = var_772_to_fp16)[name = tensor<string, []>("aw_chunk_43_cast_fp16")];
+            tensor<string, []> var_775_equation_0 = const()[name = tensor<string, []>("op_775_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_775_cast_fp16 = einsum(equation = var_775_equation_0, values = (var_613_cast_fp16, var_413_cast_fp16))[name = tensor<string, []>("op_775_cast_fp16")];
+            tensor<fp16, []> var_776_to_fp16 = const()[name = tensor<string, []>("op_776_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_45_cast_fp16 = mul(x = var_775_cast_fp16, y = var_776_to_fp16)[name = tensor<string, []>("aw_chunk_45_cast_fp16")];
+            tensor<string, []> var_779_equation_0 = const()[name = tensor<string, []>("op_779_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_779_cast_fp16 = einsum(equation = var_779_equation_0, values = (var_613_cast_fp16, var_420_cast_fp16))[name = tensor<string, []>("op_779_cast_fp16")];
+            tensor<fp16, []> var_780_to_fp16 = const()[name = tensor<string, []>("op_780_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_47_cast_fp16 = mul(x = var_779_cast_fp16, y = var_780_to_fp16)[name = tensor<string, []>("aw_chunk_47_cast_fp16")];
+            tensor<string, []> var_783_equation_0 = const()[name = tensor<string, []>("op_783_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_783_cast_fp16 = einsum(equation = var_783_equation_0, values = (var_617_cast_fp16, var_427_cast_fp16))[name = tensor<string, []>("op_783_cast_fp16")];
+            tensor<fp16, []> var_784_to_fp16 = const()[name = tensor<string, []>("op_784_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_49_cast_fp16 = mul(x = var_783_cast_fp16, y = var_784_to_fp16)[name = tensor<string, []>("aw_chunk_49_cast_fp16")];
+            tensor<string, []> var_787_equation_0 = const()[name = tensor<string, []>("op_787_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_787_cast_fp16 = einsum(equation = var_787_equation_0, values = (var_617_cast_fp16, var_434_cast_fp16))[name = tensor<string, []>("op_787_cast_fp16")];
+            tensor<fp16, []> var_788_to_fp16 = const()[name = tensor<string, []>("op_788_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_51_cast_fp16 = mul(x = var_787_cast_fp16, y = var_788_to_fp16)[name = tensor<string, []>("aw_chunk_51_cast_fp16")];
+            tensor<string, []> var_791_equation_0 = const()[name = tensor<string, []>("op_791_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_791_cast_fp16 = einsum(equation = var_791_equation_0, values = (var_617_cast_fp16, var_441_cast_fp16))[name = tensor<string, []>("op_791_cast_fp16")];
+            tensor<fp16, []> var_792_to_fp16 = const()[name = tensor<string, []>("op_792_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_53_cast_fp16 = mul(x = var_791_cast_fp16, y = var_792_to_fp16)[name = tensor<string, []>("aw_chunk_53_cast_fp16")];
+            tensor<string, []> var_795_equation_0 = const()[name = tensor<string, []>("op_795_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_795_cast_fp16 = einsum(equation = var_795_equation_0, values = (var_617_cast_fp16, var_448_cast_fp16))[name = tensor<string, []>("op_795_cast_fp16")];
+            tensor<fp16, []> var_796_to_fp16 = const()[name = tensor<string, []>("op_796_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_55_cast_fp16 = mul(x = var_795_cast_fp16, y = var_796_to_fp16)[name = tensor<string, []>("aw_chunk_55_cast_fp16")];
+            tensor<string, []> var_799_equation_0 = const()[name = tensor<string, []>("op_799_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_799_cast_fp16 = einsum(equation = var_799_equation_0, values = (var_621_cast_fp16, var_455_cast_fp16))[name = tensor<string, []>("op_799_cast_fp16")];
+            tensor<fp16, []> var_800_to_fp16 = const()[name = tensor<string, []>("op_800_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_57_cast_fp16 = mul(x = var_799_cast_fp16, y = var_800_to_fp16)[name = tensor<string, []>("aw_chunk_57_cast_fp16")];
+            tensor<string, []> var_803_equation_0 = const()[name = tensor<string, []>("op_803_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_803_cast_fp16 = einsum(equation = var_803_equation_0, values = (var_621_cast_fp16, var_462_cast_fp16))[name = tensor<string, []>("op_803_cast_fp16")];
+            tensor<fp16, []> var_804_to_fp16 = const()[name = tensor<string, []>("op_804_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_59_cast_fp16 = mul(x = var_803_cast_fp16, y = var_804_to_fp16)[name = tensor<string, []>("aw_chunk_59_cast_fp16")];
+            tensor<string, []> var_807_equation_0 = const()[name = tensor<string, []>("op_807_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_807_cast_fp16 = einsum(equation = var_807_equation_0, values = (var_621_cast_fp16, var_469_cast_fp16))[name = tensor<string, []>("op_807_cast_fp16")];
+            tensor<fp16, []> var_808_to_fp16 = const()[name = tensor<string, []>("op_808_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_61_cast_fp16 = mul(x = var_807_cast_fp16, y = var_808_to_fp16)[name = tensor<string, []>("aw_chunk_61_cast_fp16")];
+            tensor<string, []> var_811_equation_0 = const()[name = tensor<string, []>("op_811_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_811_cast_fp16 = einsum(equation = var_811_equation_0, values = (var_621_cast_fp16, var_476_cast_fp16))[name = tensor<string, []>("op_811_cast_fp16")];
+            tensor<fp16, []> var_812_to_fp16 = const()[name = tensor<string, []>("op_812_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_63_cast_fp16 = mul(x = var_811_cast_fp16, y = var_812_to_fp16)[name = tensor<string, []>("aw_chunk_63_cast_fp16")];
+            tensor<string, []> var_815_equation_0 = const()[name = tensor<string, []>("op_815_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_815_cast_fp16 = einsum(equation = var_815_equation_0, values = (var_625_cast_fp16, var_483_cast_fp16))[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<fp16, []> var_816_to_fp16 = const()[name = tensor<string, []>("op_816_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_65_cast_fp16 = mul(x = var_815_cast_fp16, y = var_816_to_fp16)[name = tensor<string, []>("aw_chunk_65_cast_fp16")];
+            tensor<string, []> var_819_equation_0 = const()[name = tensor<string, []>("op_819_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_819_cast_fp16 = einsum(equation = var_819_equation_0, values = (var_625_cast_fp16, var_490_cast_fp16))[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, []> var_820_to_fp16 = const()[name = tensor<string, []>("op_820_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_67_cast_fp16 = mul(x = var_819_cast_fp16, y = var_820_to_fp16)[name = tensor<string, []>("aw_chunk_67_cast_fp16")];
+            tensor<string, []> var_823_equation_0 = const()[name = tensor<string, []>("op_823_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_823_cast_fp16 = einsum(equation = var_823_equation_0, values = (var_625_cast_fp16, var_497_cast_fp16))[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, []> var_824_to_fp16 = const()[name = tensor<string, []>("op_824_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_69_cast_fp16 = mul(x = var_823_cast_fp16, y = var_824_to_fp16)[name = tensor<string, []>("aw_chunk_69_cast_fp16")];
+            tensor<string, []> var_827_equation_0 = const()[name = tensor<string, []>("op_827_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_827_cast_fp16 = einsum(equation = var_827_equation_0, values = (var_625_cast_fp16, var_504_cast_fp16))[name = tensor<string, []>("op_827_cast_fp16")];
+            tensor<fp16, []> var_828_to_fp16 = const()[name = tensor<string, []>("op_828_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_71_cast_fp16 = mul(x = var_827_cast_fp16, y = var_828_to_fp16)[name = tensor<string, []>("aw_chunk_71_cast_fp16")];
+            tensor<string, []> var_831_equation_0 = const()[name = tensor<string, []>("op_831_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_831_cast_fp16 = einsum(equation = var_831_equation_0, values = (var_629_cast_fp16, var_511_cast_fp16))[name = tensor<string, []>("op_831_cast_fp16")];
+            tensor<fp16, []> var_832_to_fp16 = const()[name = tensor<string, []>("op_832_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_73_cast_fp16 = mul(x = var_831_cast_fp16, y = var_832_to_fp16)[name = tensor<string, []>("aw_chunk_73_cast_fp16")];
+            tensor<string, []> var_835_equation_0 = const()[name = tensor<string, []>("op_835_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_835_cast_fp16 = einsum(equation = var_835_equation_0, values = (var_629_cast_fp16, var_518_cast_fp16))[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<fp16, []> var_836_to_fp16 = const()[name = tensor<string, []>("op_836_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_75_cast_fp16 = mul(x = var_835_cast_fp16, y = var_836_to_fp16)[name = tensor<string, []>("aw_chunk_75_cast_fp16")];
+            tensor<string, []> var_839_equation_0 = const()[name = tensor<string, []>("op_839_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_839_cast_fp16 = einsum(equation = var_839_equation_0, values = (var_629_cast_fp16, var_525_cast_fp16))[name = tensor<string, []>("op_839_cast_fp16")];
+            tensor<fp16, []> var_840_to_fp16 = const()[name = tensor<string, []>("op_840_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_77_cast_fp16 = mul(x = var_839_cast_fp16, y = var_840_to_fp16)[name = tensor<string, []>("aw_chunk_77_cast_fp16")];
+            tensor<string, []> var_843_equation_0 = const()[name = tensor<string, []>("op_843_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_843_cast_fp16 = einsum(equation = var_843_equation_0, values = (var_629_cast_fp16, var_532_cast_fp16))[name = tensor<string, []>("op_843_cast_fp16")];
+            tensor<fp16, []> var_844_to_fp16 = const()[name = tensor<string, []>("op_844_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_79_cast_fp16 = mul(x = var_843_cast_fp16, y = var_844_to_fp16)[name = tensor<string, []>("aw_chunk_79_cast_fp16")];
+            tensor<string, []> var_847_equation_0 = const()[name = tensor<string, []>("op_847_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_847_cast_fp16 = einsum(equation = var_847_equation_0, values = (var_633_cast_fp16, var_539_cast_fp16))[name = tensor<string, []>("op_847_cast_fp16")];
+            tensor<fp16, []> var_848_to_fp16 = const()[name = tensor<string, []>("op_848_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_81_cast_fp16 = mul(x = var_847_cast_fp16, y = var_848_to_fp16)[name = tensor<string, []>("aw_chunk_81_cast_fp16")];
+            tensor<string, []> var_851_equation_0 = const()[name = tensor<string, []>("op_851_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_851_cast_fp16 = einsum(equation = var_851_equation_0, values = (var_633_cast_fp16, var_546_cast_fp16))[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<fp16, []> var_852_to_fp16 = const()[name = tensor<string, []>("op_852_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_83_cast_fp16 = mul(x = var_851_cast_fp16, y = var_852_to_fp16)[name = tensor<string, []>("aw_chunk_83_cast_fp16")];
+            tensor<string, []> var_855_equation_0 = const()[name = tensor<string, []>("op_855_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_855_cast_fp16 = einsum(equation = var_855_equation_0, values = (var_633_cast_fp16, var_553_cast_fp16))[name = tensor<string, []>("op_855_cast_fp16")];
+            tensor<fp16, []> var_856_to_fp16 = const()[name = tensor<string, []>("op_856_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_85_cast_fp16 = mul(x = var_855_cast_fp16, y = var_856_to_fp16)[name = tensor<string, []>("aw_chunk_85_cast_fp16")];
+            tensor<string, []> var_859_equation_0 = const()[name = tensor<string, []>("op_859_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_859_cast_fp16 = einsum(equation = var_859_equation_0, values = (var_633_cast_fp16, var_560_cast_fp16))[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<fp16, []> var_860_to_fp16 = const()[name = tensor<string, []>("op_860_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_87_cast_fp16 = mul(x = var_859_cast_fp16, y = var_860_to_fp16)[name = tensor<string, []>("aw_chunk_87_cast_fp16")];
+            tensor<string, []> var_863_equation_0 = const()[name = tensor<string, []>("op_863_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_863_cast_fp16 = einsum(equation = var_863_equation_0, values = (var_637_cast_fp16, var_567_cast_fp16))[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<fp16, []> var_864_to_fp16 = const()[name = tensor<string, []>("op_864_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_89_cast_fp16 = mul(x = var_863_cast_fp16, y = var_864_to_fp16)[name = tensor<string, []>("aw_chunk_89_cast_fp16")];
+            tensor<string, []> var_867_equation_0 = const()[name = tensor<string, []>("op_867_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_867_cast_fp16 = einsum(equation = var_867_equation_0, values = (var_637_cast_fp16, var_574_cast_fp16))[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<fp16, []> var_868_to_fp16 = const()[name = tensor<string, []>("op_868_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_91_cast_fp16 = mul(x = var_867_cast_fp16, y = var_868_to_fp16)[name = tensor<string, []>("aw_chunk_91_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_637_cast_fp16, var_581_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<fp16, []> var_872_to_fp16 = const()[name = tensor<string, []>("op_872_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_93_cast_fp16 = mul(x = var_871_cast_fp16, y = var_872_to_fp16)[name = tensor<string, []>("aw_chunk_93_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_637_cast_fp16, var_588_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<fp16, []> var_876_to_fp16 = const()[name = tensor<string, []>("op_876_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_95_cast_fp16 = mul(x = var_875_cast_fp16, y = var_876_to_fp16)[name = tensor<string, []>("aw_chunk_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_878_cast_fp16 = softmax(axis = var_151, x = aw_chunk_1_cast_fp16)[name = tensor<string, []>("op_878_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_879_cast_fp16 = softmax(axis = var_151, x = aw_chunk_3_cast_fp16)[name = tensor<string, []>("op_879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_880_cast_fp16 = softmax(axis = var_151, x = aw_chunk_5_cast_fp16)[name = tensor<string, []>("op_880_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_881_cast_fp16 = softmax(axis = var_151, x = aw_chunk_7_cast_fp16)[name = tensor<string, []>("op_881_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_882_cast_fp16 = softmax(axis = var_151, x = aw_chunk_9_cast_fp16)[name = tensor<string, []>("op_882_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_883_cast_fp16 = softmax(axis = var_151, x = aw_chunk_11_cast_fp16)[name = tensor<string, []>("op_883_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_884_cast_fp16 = softmax(axis = var_151, x = aw_chunk_13_cast_fp16)[name = tensor<string, []>("op_884_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_885_cast_fp16 = softmax(axis = var_151, x = aw_chunk_15_cast_fp16)[name = tensor<string, []>("op_885_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_886_cast_fp16 = softmax(axis = var_151, x = aw_chunk_17_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_887_cast_fp16 = softmax(axis = var_151, x = aw_chunk_19_cast_fp16)[name = tensor<string, []>("op_887_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_888_cast_fp16 = softmax(axis = var_151, x = aw_chunk_21_cast_fp16)[name = tensor<string, []>("op_888_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_889_cast_fp16 = softmax(axis = var_151, x = aw_chunk_23_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_890_cast_fp16 = softmax(axis = var_151, x = aw_chunk_25_cast_fp16)[name = tensor<string, []>("op_890_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_891_cast_fp16 = softmax(axis = var_151, x = aw_chunk_27_cast_fp16)[name = tensor<string, []>("op_891_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_892_cast_fp16 = softmax(axis = var_151, x = aw_chunk_29_cast_fp16)[name = tensor<string, []>("op_892_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_893_cast_fp16 = softmax(axis = var_151, x = aw_chunk_31_cast_fp16)[name = tensor<string, []>("op_893_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_894_cast_fp16 = softmax(axis = var_151, x = aw_chunk_33_cast_fp16)[name = tensor<string, []>("op_894_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_895_cast_fp16 = softmax(axis = var_151, x = aw_chunk_35_cast_fp16)[name = tensor<string, []>("op_895_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_896_cast_fp16 = softmax(axis = var_151, x = aw_chunk_37_cast_fp16)[name = tensor<string, []>("op_896_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_897_cast_fp16 = softmax(axis = var_151, x = aw_chunk_39_cast_fp16)[name = tensor<string, []>("op_897_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_898_cast_fp16 = softmax(axis = var_151, x = aw_chunk_41_cast_fp16)[name = tensor<string, []>("op_898_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_899_cast_fp16 = softmax(axis = var_151, x = aw_chunk_43_cast_fp16)[name = tensor<string, []>("op_899_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_900_cast_fp16 = softmax(axis = var_151, x = aw_chunk_45_cast_fp16)[name = tensor<string, []>("op_900_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_901_cast_fp16 = softmax(axis = var_151, x = aw_chunk_47_cast_fp16)[name = tensor<string, []>("op_901_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_902_cast_fp16 = softmax(axis = var_151, x = aw_chunk_49_cast_fp16)[name = tensor<string, []>("op_902_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_903_cast_fp16 = softmax(axis = var_151, x = aw_chunk_51_cast_fp16)[name = tensor<string, []>("op_903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_904_cast_fp16 = softmax(axis = var_151, x = aw_chunk_53_cast_fp16)[name = tensor<string, []>("op_904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_905_cast_fp16 = softmax(axis = var_151, x = aw_chunk_55_cast_fp16)[name = tensor<string, []>("op_905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_906_cast_fp16 = softmax(axis = var_151, x = aw_chunk_57_cast_fp16)[name = tensor<string, []>("op_906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_907_cast_fp16 = softmax(axis = var_151, x = aw_chunk_59_cast_fp16)[name = tensor<string, []>("op_907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_908_cast_fp16 = softmax(axis = var_151, x = aw_chunk_61_cast_fp16)[name = tensor<string, []>("op_908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_909_cast_fp16 = softmax(axis = var_151, x = aw_chunk_63_cast_fp16)[name = tensor<string, []>("op_909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_910_cast_fp16 = softmax(axis = var_151, x = aw_chunk_65_cast_fp16)[name = tensor<string, []>("op_910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_911_cast_fp16 = softmax(axis = var_151, x = aw_chunk_67_cast_fp16)[name = tensor<string, []>("op_911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_912_cast_fp16 = softmax(axis = var_151, x = aw_chunk_69_cast_fp16)[name = tensor<string, []>("op_912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_913_cast_fp16 = softmax(axis = var_151, x = aw_chunk_71_cast_fp16)[name = tensor<string, []>("op_913_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_914_cast_fp16 = softmax(axis = var_151, x = aw_chunk_73_cast_fp16)[name = tensor<string, []>("op_914_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_915_cast_fp16 = softmax(axis = var_151, x = aw_chunk_75_cast_fp16)[name = tensor<string, []>("op_915_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_916_cast_fp16 = softmax(axis = var_151, x = aw_chunk_77_cast_fp16)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_917_cast_fp16 = softmax(axis = var_151, x = aw_chunk_79_cast_fp16)[name = tensor<string, []>("op_917_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_918_cast_fp16 = softmax(axis = var_151, x = aw_chunk_81_cast_fp16)[name = tensor<string, []>("op_918_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_919_cast_fp16 = softmax(axis = var_151, x = aw_chunk_83_cast_fp16)[name = tensor<string, []>("op_919_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_920_cast_fp16 = softmax(axis = var_151, x = aw_chunk_85_cast_fp16)[name = tensor<string, []>("op_920_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_921_cast_fp16 = softmax(axis = var_151, x = aw_chunk_87_cast_fp16)[name = tensor<string, []>("op_921_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_922_cast_fp16 = softmax(axis = var_151, x = aw_chunk_89_cast_fp16)[name = tensor<string, []>("op_922_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_923_cast_fp16 = softmax(axis = var_151, x = aw_chunk_91_cast_fp16)[name = tensor<string, []>("op_923_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_924_cast_fp16 = softmax(axis = var_151, x = aw_chunk_93_cast_fp16)[name = tensor<string, []>("op_924_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_925_cast_fp16 = softmax(axis = var_151, x = aw_chunk_95_cast_fp16)[name = tensor<string, []>("op_925_cast_fp16")];
+            tensor<string, []> var_927_equation_0 = const()[name = tensor<string, []>("op_927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_927_cast_fp16 = einsum(equation = var_927_equation_0, values = (var_639_cast_fp16, var_878_cast_fp16))[name = tensor<string, []>("op_927_cast_fp16")];
+            tensor<string, []> var_929_equation_0 = const()[name = tensor<string, []>("op_929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_929_cast_fp16 = einsum(equation = var_929_equation_0, values = (var_639_cast_fp16, var_879_cast_fp16))[name = tensor<string, []>("op_929_cast_fp16")];
+            tensor<string, []> var_931_equation_0 = const()[name = tensor<string, []>("op_931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_931_cast_fp16 = einsum(equation = var_931_equation_0, values = (var_639_cast_fp16, var_880_cast_fp16))[name = tensor<string, []>("op_931_cast_fp16")];
+            tensor<string, []> var_933_equation_0 = const()[name = tensor<string, []>("op_933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_933_cast_fp16 = einsum(equation = var_933_equation_0, values = (var_639_cast_fp16, var_881_cast_fp16))[name = tensor<string, []>("op_933_cast_fp16")];
+            tensor<string, []> var_935_equation_0 = const()[name = tensor<string, []>("op_935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_935_cast_fp16 = einsum(equation = var_935_equation_0, values = (var_643_cast_fp16, var_882_cast_fp16))[name = tensor<string, []>("op_935_cast_fp16")];
+            tensor<string, []> var_937_equation_0 = const()[name = tensor<string, []>("op_937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_937_cast_fp16 = einsum(equation = var_937_equation_0, values = (var_643_cast_fp16, var_883_cast_fp16))[name = tensor<string, []>("op_937_cast_fp16")];
+            tensor<string, []> var_939_equation_0 = const()[name = tensor<string, []>("op_939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_939_cast_fp16 = einsum(equation = var_939_equation_0, values = (var_643_cast_fp16, var_884_cast_fp16))[name = tensor<string, []>("op_939_cast_fp16")];
+            tensor<string, []> var_941_equation_0 = const()[name = tensor<string, []>("op_941_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_941_cast_fp16 = einsum(equation = var_941_equation_0, values = (var_643_cast_fp16, var_885_cast_fp16))[name = tensor<string, []>("op_941_cast_fp16")];
+            tensor<string, []> var_943_equation_0 = const()[name = tensor<string, []>("op_943_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_943_cast_fp16 = einsum(equation = var_943_equation_0, values = (var_647_cast_fp16, var_886_cast_fp16))[name = tensor<string, []>("op_943_cast_fp16")];
+            tensor<string, []> var_945_equation_0 = const()[name = tensor<string, []>("op_945_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_945_cast_fp16 = einsum(equation = var_945_equation_0, values = (var_647_cast_fp16, var_887_cast_fp16))[name = tensor<string, []>("op_945_cast_fp16")];
+            tensor<string, []> var_947_equation_0 = const()[name = tensor<string, []>("op_947_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_947_cast_fp16 = einsum(equation = var_947_equation_0, values = (var_647_cast_fp16, var_888_cast_fp16))[name = tensor<string, []>("op_947_cast_fp16")];
+            tensor<string, []> var_949_equation_0 = const()[name = tensor<string, []>("op_949_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_949_cast_fp16 = einsum(equation = var_949_equation_0, values = (var_647_cast_fp16, var_889_cast_fp16))[name = tensor<string, []>("op_949_cast_fp16")];
+            tensor<string, []> var_951_equation_0 = const()[name = tensor<string, []>("op_951_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_951_cast_fp16 = einsum(equation = var_951_equation_0, values = (var_651_cast_fp16, var_890_cast_fp16))[name = tensor<string, []>("op_951_cast_fp16")];
+            tensor<string, []> var_953_equation_0 = const()[name = tensor<string, []>("op_953_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_953_cast_fp16 = einsum(equation = var_953_equation_0, values = (var_651_cast_fp16, var_891_cast_fp16))[name = tensor<string, []>("op_953_cast_fp16")];
+            tensor<string, []> var_955_equation_0 = const()[name = tensor<string, []>("op_955_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_955_cast_fp16 = einsum(equation = var_955_equation_0, values = (var_651_cast_fp16, var_892_cast_fp16))[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<string, []> var_957_equation_0 = const()[name = tensor<string, []>("op_957_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_957_cast_fp16 = einsum(equation = var_957_equation_0, values = (var_651_cast_fp16, var_893_cast_fp16))[name = tensor<string, []>("op_957_cast_fp16")];
+            tensor<string, []> var_959_equation_0 = const()[name = tensor<string, []>("op_959_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_959_cast_fp16 = einsum(equation = var_959_equation_0, values = (var_655_cast_fp16, var_894_cast_fp16))[name = tensor<string, []>("op_959_cast_fp16")];
+            tensor<string, []> var_961_equation_0 = const()[name = tensor<string, []>("op_961_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_961_cast_fp16 = einsum(equation = var_961_equation_0, values = (var_655_cast_fp16, var_895_cast_fp16))[name = tensor<string, []>("op_961_cast_fp16")];
+            tensor<string, []> var_963_equation_0 = const()[name = tensor<string, []>("op_963_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_963_cast_fp16 = einsum(equation = var_963_equation_0, values = (var_655_cast_fp16, var_896_cast_fp16))[name = tensor<string, []>("op_963_cast_fp16")];
+            tensor<string, []> var_965_equation_0 = const()[name = tensor<string, []>("op_965_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_965_cast_fp16 = einsum(equation = var_965_equation_0, values = (var_655_cast_fp16, var_897_cast_fp16))[name = tensor<string, []>("op_965_cast_fp16")];
+            tensor<string, []> var_967_equation_0 = const()[name = tensor<string, []>("op_967_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_967_cast_fp16 = einsum(equation = var_967_equation_0, values = (var_659_cast_fp16, var_898_cast_fp16))[name = tensor<string, []>("op_967_cast_fp16")];
+            tensor<string, []> var_969_equation_0 = const()[name = tensor<string, []>("op_969_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_969_cast_fp16 = einsum(equation = var_969_equation_0, values = (var_659_cast_fp16, var_899_cast_fp16))[name = tensor<string, []>("op_969_cast_fp16")];
+            tensor<string, []> var_971_equation_0 = const()[name = tensor<string, []>("op_971_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_971_cast_fp16 = einsum(equation = var_971_equation_0, values = (var_659_cast_fp16, var_900_cast_fp16))[name = tensor<string, []>("op_971_cast_fp16")];
+            tensor<string, []> var_973_equation_0 = const()[name = tensor<string, []>("op_973_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_973_cast_fp16 = einsum(equation = var_973_equation_0, values = (var_659_cast_fp16, var_901_cast_fp16))[name = tensor<string, []>("op_973_cast_fp16")];
+            tensor<string, []> var_975_equation_0 = const()[name = tensor<string, []>("op_975_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_975_cast_fp16 = einsum(equation = var_975_equation_0, values = (var_663_cast_fp16, var_902_cast_fp16))[name = tensor<string, []>("op_975_cast_fp16")];
+            tensor<string, []> var_977_equation_0 = const()[name = tensor<string, []>("op_977_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_977_cast_fp16 = einsum(equation = var_977_equation_0, values = (var_663_cast_fp16, var_903_cast_fp16))[name = tensor<string, []>("op_977_cast_fp16")];
+            tensor<string, []> var_979_equation_0 = const()[name = tensor<string, []>("op_979_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_979_cast_fp16 = einsum(equation = var_979_equation_0, values = (var_663_cast_fp16, var_904_cast_fp16))[name = tensor<string, []>("op_979_cast_fp16")];
+            tensor<string, []> var_981_equation_0 = const()[name = tensor<string, []>("op_981_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_981_cast_fp16 = einsum(equation = var_981_equation_0, values = (var_663_cast_fp16, var_905_cast_fp16))[name = tensor<string, []>("op_981_cast_fp16")];
+            tensor<string, []> var_983_equation_0 = const()[name = tensor<string, []>("op_983_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_983_cast_fp16 = einsum(equation = var_983_equation_0, values = (var_667_cast_fp16, var_906_cast_fp16))[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<string, []> var_985_equation_0 = const()[name = tensor<string, []>("op_985_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_985_cast_fp16 = einsum(equation = var_985_equation_0, values = (var_667_cast_fp16, var_907_cast_fp16))[name = tensor<string, []>("op_985_cast_fp16")];
+            tensor<string, []> var_987_equation_0 = const()[name = tensor<string, []>("op_987_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_987_cast_fp16 = einsum(equation = var_987_equation_0, values = (var_667_cast_fp16, var_908_cast_fp16))[name = tensor<string, []>("op_987_cast_fp16")];
+            tensor<string, []> var_989_equation_0 = const()[name = tensor<string, []>("op_989_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_989_cast_fp16 = einsum(equation = var_989_equation_0, values = (var_667_cast_fp16, var_909_cast_fp16))[name = tensor<string, []>("op_989_cast_fp16")];
+            tensor<string, []> var_991_equation_0 = const()[name = tensor<string, []>("op_991_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_991_cast_fp16 = einsum(equation = var_991_equation_0, values = (var_671_cast_fp16, var_910_cast_fp16))[name = tensor<string, []>("op_991_cast_fp16")];
+            tensor<string, []> var_993_equation_0 = const()[name = tensor<string, []>("op_993_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_993_cast_fp16 = einsum(equation = var_993_equation_0, values = (var_671_cast_fp16, var_911_cast_fp16))[name = tensor<string, []>("op_993_cast_fp16")];
+            tensor<string, []> var_995_equation_0 = const()[name = tensor<string, []>("op_995_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_995_cast_fp16 = einsum(equation = var_995_equation_0, values = (var_671_cast_fp16, var_912_cast_fp16))[name = tensor<string, []>("op_995_cast_fp16")];
+            tensor<string, []> var_997_equation_0 = const()[name = tensor<string, []>("op_997_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_997_cast_fp16 = einsum(equation = var_997_equation_0, values = (var_671_cast_fp16, var_913_cast_fp16))[name = tensor<string, []>("op_997_cast_fp16")];
+            tensor<string, []> var_999_equation_0 = const()[name = tensor<string, []>("op_999_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_999_cast_fp16 = einsum(equation = var_999_equation_0, values = (var_675_cast_fp16, var_914_cast_fp16))[name = tensor<string, []>("op_999_cast_fp16")];
+            tensor<string, []> var_1001_equation_0 = const()[name = tensor<string, []>("op_1001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1001_cast_fp16 = einsum(equation = var_1001_equation_0, values = (var_675_cast_fp16, var_915_cast_fp16))[name = tensor<string, []>("op_1001_cast_fp16")];
+            tensor<string, []> var_1003_equation_0 = const()[name = tensor<string, []>("op_1003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1003_cast_fp16 = einsum(equation = var_1003_equation_0, values = (var_675_cast_fp16, var_916_cast_fp16))[name = tensor<string, []>("op_1003_cast_fp16")];
+            tensor<string, []> var_1005_equation_0 = const()[name = tensor<string, []>("op_1005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1005_cast_fp16 = einsum(equation = var_1005_equation_0, values = (var_675_cast_fp16, var_917_cast_fp16))[name = tensor<string, []>("op_1005_cast_fp16")];
+            tensor<string, []> var_1007_equation_0 = const()[name = tensor<string, []>("op_1007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1007_cast_fp16 = einsum(equation = var_1007_equation_0, values = (var_679_cast_fp16, var_918_cast_fp16))[name = tensor<string, []>("op_1007_cast_fp16")];
+            tensor<string, []> var_1009_equation_0 = const()[name = tensor<string, []>("op_1009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1009_cast_fp16 = einsum(equation = var_1009_equation_0, values = (var_679_cast_fp16, var_919_cast_fp16))[name = tensor<string, []>("op_1009_cast_fp16")];
+            tensor<string, []> var_1011_equation_0 = const()[name = tensor<string, []>("op_1011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1011_cast_fp16 = einsum(equation = var_1011_equation_0, values = (var_679_cast_fp16, var_920_cast_fp16))[name = tensor<string, []>("op_1011_cast_fp16")];
+            tensor<string, []> var_1013_equation_0 = const()[name = tensor<string, []>("op_1013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1013_cast_fp16 = einsum(equation = var_1013_equation_0, values = (var_679_cast_fp16, var_921_cast_fp16))[name = tensor<string, []>("op_1013_cast_fp16")];
+            tensor<string, []> var_1015_equation_0 = const()[name = tensor<string, []>("op_1015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1015_cast_fp16 = einsum(equation = var_1015_equation_0, values = (var_683_cast_fp16, var_922_cast_fp16))[name = tensor<string, []>("op_1015_cast_fp16")];
+            tensor<string, []> var_1017_equation_0 = const()[name = tensor<string, []>("op_1017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1017_cast_fp16 = einsum(equation = var_1017_equation_0, values = (var_683_cast_fp16, var_923_cast_fp16))[name = tensor<string, []>("op_1017_cast_fp16")];
+            tensor<string, []> var_1019_equation_0 = const()[name = tensor<string, []>("op_1019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1019_cast_fp16 = einsum(equation = var_1019_equation_0, values = (var_683_cast_fp16, var_924_cast_fp16))[name = tensor<string, []>("op_1019_cast_fp16")];
+            tensor<string, []> var_1021_equation_0 = const()[name = tensor<string, []>("op_1021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1021_cast_fp16 = einsum(equation = var_1021_equation_0, values = (var_683_cast_fp16, var_925_cast_fp16))[name = tensor<string, []>("op_1021_cast_fp16")];
+            tensor<bool, []> var_1023_interleave_0 = const()[name = tensor<string, []>("op_1023_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1023_cast_fp16 = concat(axis = var_134, interleave = var_1023_interleave_0, values = (var_927_cast_fp16, var_929_cast_fp16, var_931_cast_fp16, var_933_cast_fp16))[name = tensor<string, []>("op_1023_cast_fp16")];
+            tensor<bool, []> var_1025_interleave_0 = const()[name = tensor<string, []>("op_1025_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1025_cast_fp16 = concat(axis = var_134, interleave = var_1025_interleave_0, values = (var_935_cast_fp16, var_937_cast_fp16, var_939_cast_fp16, var_941_cast_fp16))[name = tensor<string, []>("op_1025_cast_fp16")];
+            tensor<bool, []> var_1027_interleave_0 = const()[name = tensor<string, []>("op_1027_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1027_cast_fp16 = concat(axis = var_134, interleave = var_1027_interleave_0, values = (var_943_cast_fp16, var_945_cast_fp16, var_947_cast_fp16, var_949_cast_fp16))[name = tensor<string, []>("op_1027_cast_fp16")];
+            tensor<bool, []> var_1029_interleave_0 = const()[name = tensor<string, []>("op_1029_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1029_cast_fp16 = concat(axis = var_134, interleave = var_1029_interleave_0, values = (var_951_cast_fp16, var_953_cast_fp16, var_955_cast_fp16, var_957_cast_fp16))[name = tensor<string, []>("op_1029_cast_fp16")];
+            tensor<bool, []> var_1031_interleave_0 = const()[name = tensor<string, []>("op_1031_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1031_cast_fp16 = concat(axis = var_134, interleave = var_1031_interleave_0, values = (var_959_cast_fp16, var_961_cast_fp16, var_963_cast_fp16, var_965_cast_fp16))[name = tensor<string, []>("op_1031_cast_fp16")];
+            tensor<bool, []> var_1033_interleave_0 = const()[name = tensor<string, []>("op_1033_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1033_cast_fp16 = concat(axis = var_134, interleave = var_1033_interleave_0, values = (var_967_cast_fp16, var_969_cast_fp16, var_971_cast_fp16, var_973_cast_fp16))[name = tensor<string, []>("op_1033_cast_fp16")];
+            tensor<bool, []> var_1035_interleave_0 = const()[name = tensor<string, []>("op_1035_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1035_cast_fp16 = concat(axis = var_134, interleave = var_1035_interleave_0, values = (var_975_cast_fp16, var_977_cast_fp16, var_979_cast_fp16, var_981_cast_fp16))[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<bool, []> var_1037_interleave_0 = const()[name = tensor<string, []>("op_1037_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1037_cast_fp16 = concat(axis = var_134, interleave = var_1037_interleave_0, values = (var_983_cast_fp16, var_985_cast_fp16, var_987_cast_fp16, var_989_cast_fp16))[name = tensor<string, []>("op_1037_cast_fp16")];
+            tensor<bool, []> var_1039_interleave_0 = const()[name = tensor<string, []>("op_1039_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1039_cast_fp16 = concat(axis = var_134, interleave = var_1039_interleave_0, values = (var_991_cast_fp16, var_993_cast_fp16, var_995_cast_fp16, var_997_cast_fp16))[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<bool, []> var_1041_interleave_0 = const()[name = tensor<string, []>("op_1041_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1041_cast_fp16 = concat(axis = var_134, interleave = var_1041_interleave_0, values = (var_999_cast_fp16, var_1001_cast_fp16, var_1003_cast_fp16, var_1005_cast_fp16))[name = tensor<string, []>("op_1041_cast_fp16")];
+            tensor<bool, []> var_1043_interleave_0 = const()[name = tensor<string, []>("op_1043_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1043_cast_fp16 = concat(axis = var_134, interleave = var_1043_interleave_0, values = (var_1007_cast_fp16, var_1009_cast_fp16, var_1011_cast_fp16, var_1013_cast_fp16))[name = tensor<string, []>("op_1043_cast_fp16")];
+            tensor<bool, []> var_1045_interleave_0 = const()[name = tensor<string, []>("op_1045_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1045_cast_fp16 = concat(axis = var_134, interleave = var_1045_interleave_0, values = (var_1015_cast_fp16, var_1017_cast_fp16, var_1019_cast_fp16, var_1021_cast_fp16))[name = tensor<string, []>("op_1045_cast_fp16")];
+            tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_1_cast_fp16 = concat(axis = var_151, interleave = input_1_interleave_0, values = (var_1023_cast_fp16, var_1025_cast_fp16, var_1027_cast_fp16, var_1029_cast_fp16, var_1031_cast_fp16, var_1033_cast_fp16, var_1035_cast_fp16, var_1037_cast_fp16, var_1039_cast_fp16, var_1041_cast_fp16, var_1043_cast_fp16, var_1045_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_1050 = const()[name = tensor<string, []>("op_1050"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1052 = const()[name = tensor<string, []>("op_1052"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_3_pad_type_0 = const()[name = tensor<string, []>("obj_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_3_pad_0 = const()[name = tensor<string, []>("obj_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9763776)))];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10943488)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_1052, groups = var_151, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = var_1050, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_1058 = const()[name = tensor<string, []>("op_1058"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_1058, keep_dims = var_152, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_1062 = const()[name = tensor<string, []>("op_1062"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1063_cast_fp16 = reduce_mean(axes = var_1062, keep_dims = var_152, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_1063_cast_fp16")];
+            tensor<fp16, []> var_1064_to_fp16 = const()[name = tensor<string, []>("op_1064_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1065_cast_fp16 = add(x = var_1063_cast_fp16, y = var_1064_to_fp16)[name = tensor<string, []>("op_1065_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_1065_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [768]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10945088)))];
+            tensor<fp16, [768]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10946688)))];
+            tensor<fp16, []> input_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_1076 = const()[name = tensor<string, []>("op_1076"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1078 = const()[name = tensor<string, []>("op_1078"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_5_pad_type_0 = const()[name = tensor<string, []>("input_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10948288)))];
+            tensor<fp16, [3072]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15666944)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_1078, groups = var_151, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = var_1076, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<int32, [2]> var_1084 = const()[name = tensor<string, []>("op_1084"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1086 = const()[name = tensor<string, []>("op_1086"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15673152)))];
+            tensor<fp16, [768]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20391808)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_1086, groups = var_151, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_1084, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_1093 = const()[name = tensor<string, []>("op_1093"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1110 = const()[name = tensor<string, []>("op_1110"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1111 = const()[name = tensor<string, []>("op_1111"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1121 = const()[name = tensor<string, []>("op_1121"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_1121, keep_dims = var_1111, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_1125 = const()[name = tensor<string, []>("op_1125"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1126_cast_fp16 = reduce_mean(axes = var_1125, keep_dims = var_1111, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<fp16, []> var_1127_to_fp16 = const()[name = tensor<string, []>("op_1127_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1128_cast_fp16 = add(x = var_1126_cast_fp16, y = var_1127_to_fp16)[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_1128_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [768]> obj_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20393408)))];
+            tensor<fp16, [768]> obj_5_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20395008)))];
+            tensor<fp16, []> obj_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("obj_5_cast_fp16")];
+            tensor<int32, [2]> var_1143 = const()[name = tensor<string, []>("op_1143"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1145 = const()[name = tensor<string, []>("op_1145"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20396608)))];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21576320)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_1145, groups = var_1110, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_1143, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_1149 = const()[name = tensor<string, []>("op_1149"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1151 = const()[name = tensor<string, []>("op_1151"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21577920)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_1151, groups = var_1110, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_1149, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_1156 = const()[name = tensor<string, []>("op_1156"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1158 = const()[name = tensor<string, []>("op_1158"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22757632)))];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23937344)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_1158, groups = var_1110, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_1156, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_1165_begin_0 = const()[name = tensor<string, []>("op_1165_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1165_end_0 = const()[name = tensor<string, []>("op_1165_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1165_end_mask_0 = const()[name = tensor<string, []>("op_1165_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1165_cast_fp16 = slice_by_index(begin = var_1165_begin_0, end = var_1165_end_0, end_mask = var_1165_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1165_cast_fp16")];
+            tensor<int32, [4]> var_1169_begin_0 = const()[name = tensor<string, []>("op_1169_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1169_end_0 = const()[name = tensor<string, []>("op_1169_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1169_end_mask_0 = const()[name = tensor<string, []>("op_1169_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1169_cast_fp16 = slice_by_index(begin = var_1169_begin_0, end = var_1169_end_0, end_mask = var_1169_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1169_cast_fp16")];
+            tensor<int32, [4]> var_1173_begin_0 = const()[name = tensor<string, []>("op_1173_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1173_end_0 = const()[name = tensor<string, []>("op_1173_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1173_end_mask_0 = const()[name = tensor<string, []>("op_1173_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1173_cast_fp16 = slice_by_index(begin = var_1173_begin_0, end = var_1173_end_0, end_mask = var_1173_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1173_cast_fp16")];
+            tensor<int32, [4]> var_1177_begin_0 = const()[name = tensor<string, []>("op_1177_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1177_end_0 = const()[name = tensor<string, []>("op_1177_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1177_end_mask_0 = const()[name = tensor<string, []>("op_1177_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1177_cast_fp16 = slice_by_index(begin = var_1177_begin_0, end = var_1177_end_0, end_mask = var_1177_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1177_cast_fp16")];
+            tensor<int32, [4]> var_1181_begin_0 = const()[name = tensor<string, []>("op_1181_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1181_end_0 = const()[name = tensor<string, []>("op_1181_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1181_end_mask_0 = const()[name = tensor<string, []>("op_1181_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1181_cast_fp16 = slice_by_index(begin = var_1181_begin_0, end = var_1181_end_0, end_mask = var_1181_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1181_cast_fp16")];
+            tensor<int32, [4]> var_1185_begin_0 = const()[name = tensor<string, []>("op_1185_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1185_end_0 = const()[name = tensor<string, []>("op_1185_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1185_end_mask_0 = const()[name = tensor<string, []>("op_1185_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1185_cast_fp16 = slice_by_index(begin = var_1185_begin_0, end = var_1185_end_0, end_mask = var_1185_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1185_cast_fp16")];
+            tensor<int32, [4]> var_1189_begin_0 = const()[name = tensor<string, []>("op_1189_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1189_end_0 = const()[name = tensor<string, []>("op_1189_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1189_end_mask_0 = const()[name = tensor<string, []>("op_1189_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1189_cast_fp16 = slice_by_index(begin = var_1189_begin_0, end = var_1189_end_0, end_mask = var_1189_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1189_cast_fp16")];
+            tensor<int32, [4]> var_1193_begin_0 = const()[name = tensor<string, []>("op_1193_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1193_end_0 = const()[name = tensor<string, []>("op_1193_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1193_end_mask_0 = const()[name = tensor<string, []>("op_1193_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1193_cast_fp16 = slice_by_index(begin = var_1193_begin_0, end = var_1193_end_0, end_mask = var_1193_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1193_cast_fp16")];
+            tensor<int32, [4]> var_1197_begin_0 = const()[name = tensor<string, []>("op_1197_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_1197_end_0 = const()[name = tensor<string, []>("op_1197_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_1197_end_mask_0 = const()[name = tensor<string, []>("op_1197_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1197_cast_fp16 = slice_by_index(begin = var_1197_begin_0, end = var_1197_end_0, end_mask = var_1197_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1197_cast_fp16")];
+            tensor<int32, [4]> var_1201_begin_0 = const()[name = tensor<string, []>("op_1201_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_1201_end_0 = const()[name = tensor<string, []>("op_1201_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_1201_end_mask_0 = const()[name = tensor<string, []>("op_1201_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1201_cast_fp16 = slice_by_index(begin = var_1201_begin_0, end = var_1201_end_0, end_mask = var_1201_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1201_cast_fp16")];
+            tensor<int32, [4]> var_1205_begin_0 = const()[name = tensor<string, []>("op_1205_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_1205_end_0 = const()[name = tensor<string, []>("op_1205_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_1205_end_mask_0 = const()[name = tensor<string, []>("op_1205_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1205_cast_fp16 = slice_by_index(begin = var_1205_begin_0, end = var_1205_end_0, end_mask = var_1205_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1205_cast_fp16")];
+            tensor<int32, [4]> var_1209_begin_0 = const()[name = tensor<string, []>("op_1209_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_1209_end_0 = const()[name = tensor<string, []>("op_1209_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_1209_end_mask_0 = const()[name = tensor<string, []>("op_1209_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1209_cast_fp16 = slice_by_index(begin = var_1209_begin_0, end = var_1209_end_0, end_mask = var_1209_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_1209_cast_fp16")];
+            tensor<int32, [4]> var_1218_begin_0 = const()[name = tensor<string, []>("op_1218_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1218_end_0 = const()[name = tensor<string, []>("op_1218_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1218_end_mask_0 = const()[name = tensor<string, []>("op_1218_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1218_cast_fp16")];
+            tensor<int32, [4]> var_1225_begin_0 = const()[name = tensor<string, []>("op_1225_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1225_end_0 = const()[name = tensor<string, []>("op_1225_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1225_end_mask_0 = const()[name = tensor<string, []>("op_1225_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1225_cast_fp16 = slice_by_index(begin = var_1225_begin_0, end = var_1225_end_0, end_mask = var_1225_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1225_cast_fp16")];
+            tensor<int32, [4]> var_1232_begin_0 = const()[name = tensor<string, []>("op_1232_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1232_end_0 = const()[name = tensor<string, []>("op_1232_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1232_end_mask_0 = const()[name = tensor<string, []>("op_1232_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1232_cast_fp16 = slice_by_index(begin = var_1232_begin_0, end = var_1232_end_0, end_mask = var_1232_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<int32, [4]> var_1239_begin_0 = const()[name = tensor<string, []>("op_1239_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1239_end_0 = const()[name = tensor<string, []>("op_1239_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1239_end_mask_0 = const()[name = tensor<string, []>("op_1239_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1239_cast_fp16 = slice_by_index(begin = var_1239_begin_0, end = var_1239_end_0, end_mask = var_1239_end_mask_0, x = var_1165_cast_fp16)[name = tensor<string, []>("op_1239_cast_fp16")];
+            tensor<int32, [4]> var_1246_begin_0 = const()[name = tensor<string, []>("op_1246_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1246_end_0 = const()[name = tensor<string, []>("op_1246_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1246_end_mask_0 = const()[name = tensor<string, []>("op_1246_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1246_cast_fp16")];
+            tensor<int32, [4]> var_1253_begin_0 = const()[name = tensor<string, []>("op_1253_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1253_end_0 = const()[name = tensor<string, []>("op_1253_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1253_end_mask_0 = const()[name = tensor<string, []>("op_1253_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1253_cast_fp16 = slice_by_index(begin = var_1253_begin_0, end = var_1253_end_0, end_mask = var_1253_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1253_cast_fp16")];
+            tensor<int32, [4]> var_1260_begin_0 = const()[name = tensor<string, []>("op_1260_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1260_end_0 = const()[name = tensor<string, []>("op_1260_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1260_end_mask_0 = const()[name = tensor<string, []>("op_1260_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1260_cast_fp16")];
+            tensor<int32, [4]> var_1267_begin_0 = const()[name = tensor<string, []>("op_1267_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1267_end_0 = const()[name = tensor<string, []>("op_1267_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1267_end_mask_0 = const()[name = tensor<string, []>("op_1267_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1267_cast_fp16 = slice_by_index(begin = var_1267_begin_0, end = var_1267_end_0, end_mask = var_1267_end_mask_0, x = var_1169_cast_fp16)[name = tensor<string, []>("op_1267_cast_fp16")];
+            tensor<int32, [4]> var_1274_begin_0 = const()[name = tensor<string, []>("op_1274_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1274_end_0 = const()[name = tensor<string, []>("op_1274_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1274_end_mask_0 = const()[name = tensor<string, []>("op_1274_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1274_cast_fp16 = slice_by_index(begin = var_1274_begin_0, end = var_1274_end_0, end_mask = var_1274_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
+            tensor<int32, [4]> var_1281_begin_0 = const()[name = tensor<string, []>("op_1281_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1281_end_0 = const()[name = tensor<string, []>("op_1281_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1281_end_mask_0 = const()[name = tensor<string, []>("op_1281_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1281_cast_fp16 = slice_by_index(begin = var_1281_begin_0, end = var_1281_end_0, end_mask = var_1281_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1281_cast_fp16")];
+            tensor<int32, [4]> var_1288_begin_0 = const()[name = tensor<string, []>("op_1288_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1288_end_0 = const()[name = tensor<string, []>("op_1288_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1288_end_mask_0 = const()[name = tensor<string, []>("op_1288_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1288_cast_fp16")];
+            tensor<int32, [4]> var_1295_begin_0 = const()[name = tensor<string, []>("op_1295_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1295_end_0 = const()[name = tensor<string, []>("op_1295_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1295_end_mask_0 = const()[name = tensor<string, []>("op_1295_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1295_cast_fp16 = slice_by_index(begin = var_1295_begin_0, end = var_1295_end_0, end_mask = var_1295_end_mask_0, x = var_1173_cast_fp16)[name = tensor<string, []>("op_1295_cast_fp16")];
+            tensor<int32, [4]> var_1302_begin_0 = const()[name = tensor<string, []>("op_1302_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1302_end_0 = const()[name = tensor<string, []>("op_1302_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1302_end_mask_0 = const()[name = tensor<string, []>("op_1302_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1302_cast_fp16 = slice_by_index(begin = var_1302_begin_0, end = var_1302_end_0, end_mask = var_1302_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1302_cast_fp16")];
+            tensor<int32, [4]> var_1309_begin_0 = const()[name = tensor<string, []>("op_1309_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1309_end_0 = const()[name = tensor<string, []>("op_1309_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1309_end_mask_0 = const()[name = tensor<string, []>("op_1309_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1309_cast_fp16 = slice_by_index(begin = var_1309_begin_0, end = var_1309_end_0, end_mask = var_1309_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1309_cast_fp16")];
+            tensor<int32, [4]> var_1316_begin_0 = const()[name = tensor<string, []>("op_1316_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1316_end_0 = const()[name = tensor<string, []>("op_1316_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1316_end_mask_0 = const()[name = tensor<string, []>("op_1316_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1316_cast_fp16")];
+            tensor<int32, [4]> var_1323_begin_0 = const()[name = tensor<string, []>("op_1323_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1323_end_0 = const()[name = tensor<string, []>("op_1323_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1323_end_mask_0 = const()[name = tensor<string, []>("op_1323_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = var_1323_end_0, end_mask = var_1323_end_mask_0, x = var_1177_cast_fp16)[name = tensor<string, []>("op_1323_cast_fp16")];
+            tensor<int32, [4]> var_1330_begin_0 = const()[name = tensor<string, []>("op_1330_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1330_end_0 = const()[name = tensor<string, []>("op_1330_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1330_end_mask_0 = const()[name = tensor<string, []>("op_1330_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1330_cast_fp16 = slice_by_index(begin = var_1330_begin_0, end = var_1330_end_0, end_mask = var_1330_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1330_cast_fp16")];
+            tensor<int32, [4]> var_1337_begin_0 = const()[name = tensor<string, []>("op_1337_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1337_end_0 = const()[name = tensor<string, []>("op_1337_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1337_end_mask_0 = const()[name = tensor<string, []>("op_1337_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1337_cast_fp16 = slice_by_index(begin = var_1337_begin_0, end = var_1337_end_0, end_mask = var_1337_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1337_cast_fp16")];
+            tensor<int32, [4]> var_1344_begin_0 = const()[name = tensor<string, []>("op_1344_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1344_end_0 = const()[name = tensor<string, []>("op_1344_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1344_end_mask_0 = const()[name = tensor<string, []>("op_1344_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1344_cast_fp16")];
+            tensor<int32, [4]> var_1351_begin_0 = const()[name = tensor<string, []>("op_1351_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1351_end_0 = const()[name = tensor<string, []>("op_1351_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1351_end_mask_0 = const()[name = tensor<string, []>("op_1351_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1351_cast_fp16 = slice_by_index(begin = var_1351_begin_0, end = var_1351_end_0, end_mask = var_1351_end_mask_0, x = var_1181_cast_fp16)[name = tensor<string, []>("op_1351_cast_fp16")];
+            tensor<int32, [4]> var_1358_begin_0 = const()[name = tensor<string, []>("op_1358_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1358_end_0 = const()[name = tensor<string, []>("op_1358_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1358_end_mask_0 = const()[name = tensor<string, []>("op_1358_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1358_cast_fp16 = slice_by_index(begin = var_1358_begin_0, end = var_1358_end_0, end_mask = var_1358_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1358_cast_fp16")];
+            tensor<int32, [4]> var_1365_begin_0 = const()[name = tensor<string, []>("op_1365_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1365_end_0 = const()[name = tensor<string, []>("op_1365_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1365_end_mask_0 = const()[name = tensor<string, []>("op_1365_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1365_cast_fp16 = slice_by_index(begin = var_1365_begin_0, end = var_1365_end_0, end_mask = var_1365_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<int32, [4]> var_1372_begin_0 = const()[name = tensor<string, []>("op_1372_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1372_end_0 = const()[name = tensor<string, []>("op_1372_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1372_end_mask_0 = const()[name = tensor<string, []>("op_1372_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1372_cast_fp16")];
+            tensor<int32, [4]> var_1379_begin_0 = const()[name = tensor<string, []>("op_1379_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1379_end_0 = const()[name = tensor<string, []>("op_1379_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1379_end_mask_0 = const()[name = tensor<string, []>("op_1379_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, x = var_1185_cast_fp16)[name = tensor<string, []>("op_1379_cast_fp16")];
+            tensor<int32, [4]> var_1386_begin_0 = const()[name = tensor<string, []>("op_1386_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1386_end_0 = const()[name = tensor<string, []>("op_1386_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1386_end_mask_0 = const()[name = tensor<string, []>("op_1386_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1386_cast_fp16")];
+            tensor<int32, [4]> var_1393_begin_0 = const()[name = tensor<string, []>("op_1393_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1393_end_0 = const()[name = tensor<string, []>("op_1393_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1393_end_mask_0 = const()[name = tensor<string, []>("op_1393_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1393_cast_fp16 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<int32, [4]> var_1400_begin_0 = const()[name = tensor<string, []>("op_1400_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1400_end_0 = const()[name = tensor<string, []>("op_1400_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1400_end_mask_0 = const()[name = tensor<string, []>("op_1400_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1400_cast_fp16 = slice_by_index(begin = var_1400_begin_0, end = var_1400_end_0, end_mask = var_1400_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1400_cast_fp16")];
+            tensor<int32, [4]> var_1407_begin_0 = const()[name = tensor<string, []>("op_1407_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1407_end_0 = const()[name = tensor<string, []>("op_1407_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1407_end_mask_0 = const()[name = tensor<string, []>("op_1407_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1407_cast_fp16 = slice_by_index(begin = var_1407_begin_0, end = var_1407_end_0, end_mask = var_1407_end_mask_0, x = var_1189_cast_fp16)[name = tensor<string, []>("op_1407_cast_fp16")];
+            tensor<int32, [4]> var_1414_begin_0 = const()[name = tensor<string, []>("op_1414_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1414_end_0 = const()[name = tensor<string, []>("op_1414_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1414_end_mask_0 = const()[name = tensor<string, []>("op_1414_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1414_cast_fp16")];
+            tensor<int32, [4]> var_1421_begin_0 = const()[name = tensor<string, []>("op_1421_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1421_end_0 = const()[name = tensor<string, []>("op_1421_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1421_end_mask_0 = const()[name = tensor<string, []>("op_1421_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1421_cast_fp16 = slice_by_index(begin = var_1421_begin_0, end = var_1421_end_0, end_mask = var_1421_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1421_cast_fp16")];
+            tensor<int32, [4]> var_1428_begin_0 = const()[name = tensor<string, []>("op_1428_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1428_end_0 = const()[name = tensor<string, []>("op_1428_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1428_end_mask_0 = const()[name = tensor<string, []>("op_1428_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1428_cast_fp16 = slice_by_index(begin = var_1428_begin_0, end = var_1428_end_0, end_mask = var_1428_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1428_cast_fp16")];
+            tensor<int32, [4]> var_1435_begin_0 = const()[name = tensor<string, []>("op_1435_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1435_end_0 = const()[name = tensor<string, []>("op_1435_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1435_end_mask_0 = const()[name = tensor<string, []>("op_1435_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1435_cast_fp16 = slice_by_index(begin = var_1435_begin_0, end = var_1435_end_0, end_mask = var_1435_end_mask_0, x = var_1193_cast_fp16)[name = tensor<string, []>("op_1435_cast_fp16")];
+            tensor<int32, [4]> var_1442_begin_0 = const()[name = tensor<string, []>("op_1442_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1442_end_0 = const()[name = tensor<string, []>("op_1442_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1442_end_mask_0 = const()[name = tensor<string, []>("op_1442_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1442_cast_fp16")];
+            tensor<int32, [4]> var_1449_begin_0 = const()[name = tensor<string, []>("op_1449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1449_end_0 = const()[name = tensor<string, []>("op_1449_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1449_end_mask_0 = const()[name = tensor<string, []>("op_1449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1449_cast_fp16 = slice_by_index(begin = var_1449_begin_0, end = var_1449_end_0, end_mask = var_1449_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1449_cast_fp16")];
+            tensor<int32, [4]> var_1456_begin_0 = const()[name = tensor<string, []>("op_1456_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1456_end_0 = const()[name = tensor<string, []>("op_1456_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1456_end_mask_0 = const()[name = tensor<string, []>("op_1456_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1456_cast_fp16 = slice_by_index(begin = var_1456_begin_0, end = var_1456_end_0, end_mask = var_1456_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1456_cast_fp16")];
+            tensor<int32, [4]> var_1463_begin_0 = const()[name = tensor<string, []>("op_1463_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1463_end_0 = const()[name = tensor<string, []>("op_1463_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1463_end_mask_0 = const()[name = tensor<string, []>("op_1463_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1463_cast_fp16 = slice_by_index(begin = var_1463_begin_0, end = var_1463_end_0, end_mask = var_1463_end_mask_0, x = var_1197_cast_fp16)[name = tensor<string, []>("op_1463_cast_fp16")];
+            tensor<int32, [4]> var_1470_begin_0 = const()[name = tensor<string, []>("op_1470_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1470_end_0 = const()[name = tensor<string, []>("op_1470_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1470_end_mask_0 = const()[name = tensor<string, []>("op_1470_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<int32, [4]> var_1477_begin_0 = const()[name = tensor<string, []>("op_1477_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1477_end_0 = const()[name = tensor<string, []>("op_1477_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1477_end_mask_0 = const()[name = tensor<string, []>("op_1477_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1477_cast_fp16 = slice_by_index(begin = var_1477_begin_0, end = var_1477_end_0, end_mask = var_1477_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1477_cast_fp16")];
+            tensor<int32, [4]> var_1484_begin_0 = const()[name = tensor<string, []>("op_1484_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1484_end_0 = const()[name = tensor<string, []>("op_1484_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1484_end_mask_0 = const()[name = tensor<string, []>("op_1484_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1484_cast_fp16 = slice_by_index(begin = var_1484_begin_0, end = var_1484_end_0, end_mask = var_1484_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1484_cast_fp16")];
+            tensor<int32, [4]> var_1491_begin_0 = const()[name = tensor<string, []>("op_1491_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1491_end_0 = const()[name = tensor<string, []>("op_1491_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1491_end_mask_0 = const()[name = tensor<string, []>("op_1491_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1491_cast_fp16 = slice_by_index(begin = var_1491_begin_0, end = var_1491_end_0, end_mask = var_1491_end_mask_0, x = var_1201_cast_fp16)[name = tensor<string, []>("op_1491_cast_fp16")];
+            tensor<int32, [4]> var_1498_begin_0 = const()[name = tensor<string, []>("op_1498_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1498_end_0 = const()[name = tensor<string, []>("op_1498_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1498_end_mask_0 = const()[name = tensor<string, []>("op_1498_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1498_cast_fp16")];
+            tensor<int32, [4]> var_1505_begin_0 = const()[name = tensor<string, []>("op_1505_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1505_end_0 = const()[name = tensor<string, []>("op_1505_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1505_end_mask_0 = const()[name = tensor<string, []>("op_1505_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = var_1505_end_0, end_mask = var_1505_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1505_cast_fp16")];
+            tensor<int32, [4]> var_1512_begin_0 = const()[name = tensor<string, []>("op_1512_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1512_end_0 = const()[name = tensor<string, []>("op_1512_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1512_end_mask_0 = const()[name = tensor<string, []>("op_1512_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1512_cast_fp16 = slice_by_index(begin = var_1512_begin_0, end = var_1512_end_0, end_mask = var_1512_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1512_cast_fp16")];
+            tensor<int32, [4]> var_1519_begin_0 = const()[name = tensor<string, []>("op_1519_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1519_end_0 = const()[name = tensor<string, []>("op_1519_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1519_end_mask_0 = const()[name = tensor<string, []>("op_1519_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1519_cast_fp16 = slice_by_index(begin = var_1519_begin_0, end = var_1519_end_0, end_mask = var_1519_end_mask_0, x = var_1205_cast_fp16)[name = tensor<string, []>("op_1519_cast_fp16")];
+            tensor<int32, [4]> var_1526_begin_0 = const()[name = tensor<string, []>("op_1526_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1526_end_0 = const()[name = tensor<string, []>("op_1526_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1526_end_mask_0 = const()[name = tensor<string, []>("op_1526_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1526_cast_fp16 = slice_by_index(begin = var_1526_begin_0, end = var_1526_end_0, end_mask = var_1526_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1526_cast_fp16")];
+            tensor<int32, [4]> var_1533_begin_0 = const()[name = tensor<string, []>("op_1533_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1533_end_0 = const()[name = tensor<string, []>("op_1533_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1533_end_mask_0 = const()[name = tensor<string, []>("op_1533_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = var_1533_end_0, end_mask = var_1533_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1533_cast_fp16")];
+            tensor<int32, [4]> var_1540_begin_0 = const()[name = tensor<string, []>("op_1540_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1540_end_0 = const()[name = tensor<string, []>("op_1540_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1540_end_mask_0 = const()[name = tensor<string, []>("op_1540_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1540_cast_fp16 = slice_by_index(begin = var_1540_begin_0, end = var_1540_end_0, end_mask = var_1540_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1540_cast_fp16")];
+            tensor<int32, [4]> var_1547_begin_0 = const()[name = tensor<string, []>("op_1547_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1547_end_0 = const()[name = tensor<string, []>("op_1547_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1547_end_mask_0 = const()[name = tensor<string, []>("op_1547_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1547_cast_fp16 = slice_by_index(begin = var_1547_begin_0, end = var_1547_end_0, end_mask = var_1547_end_mask_0, x = var_1209_cast_fp16)[name = tensor<string, []>("op_1547_cast_fp16")];
+            tensor<int32, [4]> k_3_perm_0 = const()[name = tensor<string, []>("k_3_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1552_begin_0 = const()[name = tensor<string, []>("op_1552_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1552_end_0 = const()[name = tensor<string, []>("op_1552_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1552_end_mask_0 = const()[name = tensor<string, []>("op_1552_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_10 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1552_cast_fp16 = slice_by_index(begin = var_1552_begin_0, end = var_1552_end_0, end_mask = var_1552_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1552_cast_fp16")];
+            tensor<int32, [4]> var_1556_begin_0 = const()[name = tensor<string, []>("op_1556_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1556_end_0 = const()[name = tensor<string, []>("op_1556_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1556_end_mask_0 = const()[name = tensor<string, []>("op_1556_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1556_cast_fp16 = slice_by_index(begin = var_1556_begin_0, end = var_1556_end_0, end_mask = var_1556_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1556_cast_fp16")];
+            tensor<int32, [4]> var_1560_begin_0 = const()[name = tensor<string, []>("op_1560_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1560_end_0 = const()[name = tensor<string, []>("op_1560_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1560_end_mask_0 = const()[name = tensor<string, []>("op_1560_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1560_cast_fp16 = slice_by_index(begin = var_1560_begin_0, end = var_1560_end_0, end_mask = var_1560_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1560_cast_fp16")];
+            tensor<int32, [4]> var_1564_begin_0 = const()[name = tensor<string, []>("op_1564_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1564_end_0 = const()[name = tensor<string, []>("op_1564_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1564_end_mask_0 = const()[name = tensor<string, []>("op_1564_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1564_cast_fp16 = slice_by_index(begin = var_1564_begin_0, end = var_1564_end_0, end_mask = var_1564_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<int32, [4]> var_1568_begin_0 = const()[name = tensor<string, []>("op_1568_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1568_end_0 = const()[name = tensor<string, []>("op_1568_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1568_end_mask_0 = const()[name = tensor<string, []>("op_1568_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1568_cast_fp16 = slice_by_index(begin = var_1568_begin_0, end = var_1568_end_0, end_mask = var_1568_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1568_cast_fp16")];
+            tensor<int32, [4]> var_1572_begin_0 = const()[name = tensor<string, []>("op_1572_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1572_end_0 = const()[name = tensor<string, []>("op_1572_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1572_end_mask_0 = const()[name = tensor<string, []>("op_1572_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1572_cast_fp16 = slice_by_index(begin = var_1572_begin_0, end = var_1572_end_0, end_mask = var_1572_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1572_cast_fp16")];
+            tensor<int32, [4]> var_1576_begin_0 = const()[name = tensor<string, []>("op_1576_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_1576_end_0 = const()[name = tensor<string, []>("op_1576_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_1576_end_mask_0 = const()[name = tensor<string, []>("op_1576_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1576_cast_fp16 = slice_by_index(begin = var_1576_begin_0, end = var_1576_end_0, end_mask = var_1576_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1576_cast_fp16")];
+            tensor<int32, [4]> var_1580_begin_0 = const()[name = tensor<string, []>("op_1580_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_1580_end_0 = const()[name = tensor<string, []>("op_1580_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_1580_end_mask_0 = const()[name = tensor<string, []>("op_1580_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1580_cast_fp16 = slice_by_index(begin = var_1580_begin_0, end = var_1580_end_0, end_mask = var_1580_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1580_cast_fp16")];
+            tensor<int32, [4]> var_1584_begin_0 = const()[name = tensor<string, []>("op_1584_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_1584_end_0 = const()[name = tensor<string, []>("op_1584_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_1584_end_mask_0 = const()[name = tensor<string, []>("op_1584_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1584_cast_fp16 = slice_by_index(begin = var_1584_begin_0, end = var_1584_end_0, end_mask = var_1584_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1584_cast_fp16")];
+            tensor<int32, [4]> var_1588_begin_0 = const()[name = tensor<string, []>("op_1588_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_1588_end_0 = const()[name = tensor<string, []>("op_1588_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_1588_end_mask_0 = const()[name = tensor<string, []>("op_1588_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1588_cast_fp16 = slice_by_index(begin = var_1588_begin_0, end = var_1588_end_0, end_mask = var_1588_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1588_cast_fp16")];
+            tensor<int32, [4]> var_1592_begin_0 = const()[name = tensor<string, []>("op_1592_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_1592_end_0 = const()[name = tensor<string, []>("op_1592_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_1592_end_mask_0 = const()[name = tensor<string, []>("op_1592_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1592_cast_fp16 = slice_by_index(begin = var_1592_begin_0, end = var_1592_end_0, end_mask = var_1592_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1592_cast_fp16")];
+            tensor<int32, [4]> var_1596_begin_0 = const()[name = tensor<string, []>("op_1596_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_1596_end_0 = const()[name = tensor<string, []>("op_1596_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_1596_end_mask_0 = const()[name = tensor<string, []>("op_1596_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1596_cast_fp16 = slice_by_index(begin = var_1596_begin_0, end = var_1596_end_0, end_mask = var_1596_end_mask_0, x = transpose_10)[name = tensor<string, []>("op_1596_cast_fp16")];
+            tensor<int32, [4]> var_1598_begin_0 = const()[name = tensor<string, []>("op_1598_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1598_end_0 = const()[name = tensor<string, []>("op_1598_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1598_end_mask_0 = const()[name = tensor<string, []>("op_1598_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16 = slice_by_index(begin = var_1598_begin_0, end = var_1598_end_0, end_mask = var_1598_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<int32, [4]> var_1602_begin_0 = const()[name = tensor<string, []>("op_1602_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1602_end_0 = const()[name = tensor<string, []>("op_1602_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1602_end_mask_0 = const()[name = tensor<string, []>("op_1602_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1602_cast_fp16 = slice_by_index(begin = var_1602_begin_0, end = var_1602_end_0, end_mask = var_1602_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1602_cast_fp16")];
+            tensor<int32, [4]> var_1606_begin_0 = const()[name = tensor<string, []>("op_1606_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1606_end_0 = const()[name = tensor<string, []>("op_1606_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1606_end_mask_0 = const()[name = tensor<string, []>("op_1606_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1606_cast_fp16 = slice_by_index(begin = var_1606_begin_0, end = var_1606_end_0, end_mask = var_1606_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1606_cast_fp16")];
+            tensor<int32, [4]> var_1610_begin_0 = const()[name = tensor<string, []>("op_1610_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1610_end_0 = const()[name = tensor<string, []>("op_1610_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1610_end_mask_0 = const()[name = tensor<string, []>("op_1610_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1610_cast_fp16 = slice_by_index(begin = var_1610_begin_0, end = var_1610_end_0, end_mask = var_1610_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1610_cast_fp16")];
+            tensor<int32, [4]> var_1614_begin_0 = const()[name = tensor<string, []>("op_1614_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1614_end_0 = const()[name = tensor<string, []>("op_1614_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1614_end_mask_0 = const()[name = tensor<string, []>("op_1614_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1614_cast_fp16 = slice_by_index(begin = var_1614_begin_0, end = var_1614_end_0, end_mask = var_1614_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1614_cast_fp16")];
+            tensor<int32, [4]> var_1618_begin_0 = const()[name = tensor<string, []>("op_1618_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1618_end_0 = const()[name = tensor<string, []>("op_1618_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1618_end_mask_0 = const()[name = tensor<string, []>("op_1618_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1618_cast_fp16 = slice_by_index(begin = var_1618_begin_0, end = var_1618_end_0, end_mask = var_1618_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1618_cast_fp16")];
+            tensor<int32, [4]> var_1622_begin_0 = const()[name = tensor<string, []>("op_1622_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_1622_end_0 = const()[name = tensor<string, []>("op_1622_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_1622_end_mask_0 = const()[name = tensor<string, []>("op_1622_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1622_cast_fp16 = slice_by_index(begin = var_1622_begin_0, end = var_1622_end_0, end_mask = var_1622_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1622_cast_fp16")];
+            tensor<int32, [4]> var_1626_begin_0 = const()[name = tensor<string, []>("op_1626_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_1626_end_0 = const()[name = tensor<string, []>("op_1626_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_1626_end_mask_0 = const()[name = tensor<string, []>("op_1626_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1626_cast_fp16 = slice_by_index(begin = var_1626_begin_0, end = var_1626_end_0, end_mask = var_1626_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1626_cast_fp16")];
+            tensor<int32, [4]> var_1630_begin_0 = const()[name = tensor<string, []>("op_1630_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_1630_end_0 = const()[name = tensor<string, []>("op_1630_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_1630_end_mask_0 = const()[name = tensor<string, []>("op_1630_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1630_cast_fp16 = slice_by_index(begin = var_1630_begin_0, end = var_1630_end_0, end_mask = var_1630_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1630_cast_fp16")];
+            tensor<int32, [4]> var_1634_begin_0 = const()[name = tensor<string, []>("op_1634_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_1634_end_0 = const()[name = tensor<string, []>("op_1634_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_1634_end_mask_0 = const()[name = tensor<string, []>("op_1634_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1634_cast_fp16 = slice_by_index(begin = var_1634_begin_0, end = var_1634_end_0, end_mask = var_1634_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1634_cast_fp16")];
+            tensor<int32, [4]> var_1638_begin_0 = const()[name = tensor<string, []>("op_1638_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_1638_end_0 = const()[name = tensor<string, []>("op_1638_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_1638_end_mask_0 = const()[name = tensor<string, []>("op_1638_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1638_cast_fp16 = slice_by_index(begin = var_1638_begin_0, end = var_1638_end_0, end_mask = var_1638_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<int32, [4]> var_1642_begin_0 = const()[name = tensor<string, []>("op_1642_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_1642_end_0 = const()[name = tensor<string, []>("op_1642_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_1642_end_mask_0 = const()[name = tensor<string, []>("op_1642_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1642_cast_fp16 = slice_by_index(begin = var_1642_begin_0, end = var_1642_end_0, end_mask = var_1642_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<string, []> var_1646_equation_0 = const()[name = tensor<string, []>("op_1646_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1552_cast_fp16, var_1218_cast_fp16))[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<fp16, []> var_1647_to_fp16 = const()[name = tensor<string, []>("op_1647_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_97_cast_fp16 = mul(x = var_1646_cast_fp16, y = var_1647_to_fp16)[name = tensor<string, []>("aw_chunk_97_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1552_cast_fp16, var_1225_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<fp16, []> var_1651_to_fp16 = const()[name = tensor<string, []>("op_1651_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_99_cast_fp16 = mul(x = var_1650_cast_fp16, y = var_1651_to_fp16)[name = tensor<string, []>("aw_chunk_99_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1552_cast_fp16, var_1232_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<fp16, []> var_1655_to_fp16 = const()[name = tensor<string, []>("op_1655_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_101_cast_fp16 = mul(x = var_1654_cast_fp16, y = var_1655_to_fp16)[name = tensor<string, []>("aw_chunk_101_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1552_cast_fp16, var_1239_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<fp16, []> var_1659_to_fp16 = const()[name = tensor<string, []>("op_1659_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_103_cast_fp16 = mul(x = var_1658_cast_fp16, y = var_1659_to_fp16)[name = tensor<string, []>("aw_chunk_103_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1556_cast_fp16, var_1246_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<fp16, []> var_1663_to_fp16 = const()[name = tensor<string, []>("op_1663_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_105_cast_fp16 = mul(x = var_1662_cast_fp16, y = var_1663_to_fp16)[name = tensor<string, []>("aw_chunk_105_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1556_cast_fp16, var_1253_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<fp16, []> var_1667_to_fp16 = const()[name = tensor<string, []>("op_1667_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_107_cast_fp16 = mul(x = var_1666_cast_fp16, y = var_1667_to_fp16)[name = tensor<string, []>("aw_chunk_107_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1556_cast_fp16, var_1260_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<fp16, []> var_1671_to_fp16 = const()[name = tensor<string, []>("op_1671_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_109_cast_fp16 = mul(x = var_1670_cast_fp16, y = var_1671_to_fp16)[name = tensor<string, []>("aw_chunk_109_cast_fp16")];
+            tensor<string, []> var_1674_equation_0 = const()[name = tensor<string, []>("op_1674_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1556_cast_fp16, var_1267_cast_fp16))[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<fp16, []> var_1675_to_fp16 = const()[name = tensor<string, []>("op_1675_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_111_cast_fp16 = mul(x = var_1674_cast_fp16, y = var_1675_to_fp16)[name = tensor<string, []>("aw_chunk_111_cast_fp16")];
+            tensor<string, []> var_1678_equation_0 = const()[name = tensor<string, []>("op_1678_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1560_cast_fp16, var_1274_cast_fp16))[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<fp16, []> var_1679_to_fp16 = const()[name = tensor<string, []>("op_1679_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_113_cast_fp16 = mul(x = var_1678_cast_fp16, y = var_1679_to_fp16)[name = tensor<string, []>("aw_chunk_113_cast_fp16")];
+            tensor<string, []> var_1682_equation_0 = const()[name = tensor<string, []>("op_1682_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1560_cast_fp16, var_1281_cast_fp16))[name = tensor<string, []>("op_1682_cast_fp16")];
+            tensor<fp16, []> var_1683_to_fp16 = const()[name = tensor<string, []>("op_1683_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_115_cast_fp16 = mul(x = var_1682_cast_fp16, y = var_1683_to_fp16)[name = tensor<string, []>("aw_chunk_115_cast_fp16")];
+            tensor<string, []> var_1686_equation_0 = const()[name = tensor<string, []>("op_1686_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1686_cast_fp16 = einsum(equation = var_1686_equation_0, values = (var_1560_cast_fp16, var_1288_cast_fp16))[name = tensor<string, []>("op_1686_cast_fp16")];
+            tensor<fp16, []> var_1687_to_fp16 = const()[name = tensor<string, []>("op_1687_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_117_cast_fp16 = mul(x = var_1686_cast_fp16, y = var_1687_to_fp16)[name = tensor<string, []>("aw_chunk_117_cast_fp16")];
+            tensor<string, []> var_1690_equation_0 = const()[name = tensor<string, []>("op_1690_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1690_cast_fp16 = einsum(equation = var_1690_equation_0, values = (var_1560_cast_fp16, var_1295_cast_fp16))[name = tensor<string, []>("op_1690_cast_fp16")];
+            tensor<fp16, []> var_1691_to_fp16 = const()[name = tensor<string, []>("op_1691_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_119_cast_fp16 = mul(x = var_1690_cast_fp16, y = var_1691_to_fp16)[name = tensor<string, []>("aw_chunk_119_cast_fp16")];
+            tensor<string, []> var_1694_equation_0 = const()[name = tensor<string, []>("op_1694_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1694_cast_fp16 = einsum(equation = var_1694_equation_0, values = (var_1564_cast_fp16, var_1302_cast_fp16))[name = tensor<string, []>("op_1694_cast_fp16")];
+            tensor<fp16, []> var_1695_to_fp16 = const()[name = tensor<string, []>("op_1695_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_121_cast_fp16 = mul(x = var_1694_cast_fp16, y = var_1695_to_fp16)[name = tensor<string, []>("aw_chunk_121_cast_fp16")];
+            tensor<string, []> var_1698_equation_0 = const()[name = tensor<string, []>("op_1698_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1564_cast_fp16, var_1309_cast_fp16))[name = tensor<string, []>("op_1698_cast_fp16")];
+            tensor<fp16, []> var_1699_to_fp16 = const()[name = tensor<string, []>("op_1699_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_123_cast_fp16 = mul(x = var_1698_cast_fp16, y = var_1699_to_fp16)[name = tensor<string, []>("aw_chunk_123_cast_fp16")];
+            tensor<string, []> var_1702_equation_0 = const()[name = tensor<string, []>("op_1702_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1564_cast_fp16, var_1316_cast_fp16))[name = tensor<string, []>("op_1702_cast_fp16")];
+            tensor<fp16, []> var_1703_to_fp16 = const()[name = tensor<string, []>("op_1703_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_125_cast_fp16 = mul(x = var_1702_cast_fp16, y = var_1703_to_fp16)[name = tensor<string, []>("aw_chunk_125_cast_fp16")];
+            tensor<string, []> var_1706_equation_0 = const()[name = tensor<string, []>("op_1706_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1564_cast_fp16, var_1323_cast_fp16))[name = tensor<string, []>("op_1706_cast_fp16")];
+            tensor<fp16, []> var_1707_to_fp16 = const()[name = tensor<string, []>("op_1707_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_127_cast_fp16 = mul(x = var_1706_cast_fp16, y = var_1707_to_fp16)[name = tensor<string, []>("aw_chunk_127_cast_fp16")];
+            tensor<string, []> var_1710_equation_0 = const()[name = tensor<string, []>("op_1710_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1568_cast_fp16, var_1330_cast_fp16))[name = tensor<string, []>("op_1710_cast_fp16")];
+            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_129_cast_fp16 = mul(x = var_1710_cast_fp16, y = var_1711_to_fp16)[name = tensor<string, []>("aw_chunk_129_cast_fp16")];
+            tensor<string, []> var_1714_equation_0 = const()[name = tensor<string, []>("op_1714_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1568_cast_fp16, var_1337_cast_fp16))[name = tensor<string, []>("op_1714_cast_fp16")];
+            tensor<fp16, []> var_1715_to_fp16 = const()[name = tensor<string, []>("op_1715_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_131_cast_fp16 = mul(x = var_1714_cast_fp16, y = var_1715_to_fp16)[name = tensor<string, []>("aw_chunk_131_cast_fp16")];
+            tensor<string, []> var_1718_equation_0 = const()[name = tensor<string, []>("op_1718_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1568_cast_fp16, var_1344_cast_fp16))[name = tensor<string, []>("op_1718_cast_fp16")];
+            tensor<fp16, []> var_1719_to_fp16 = const()[name = tensor<string, []>("op_1719_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_133_cast_fp16 = mul(x = var_1718_cast_fp16, y = var_1719_to_fp16)[name = tensor<string, []>("aw_chunk_133_cast_fp16")];
+            tensor<string, []> var_1722_equation_0 = const()[name = tensor<string, []>("op_1722_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1568_cast_fp16, var_1351_cast_fp16))[name = tensor<string, []>("op_1722_cast_fp16")];
+            tensor<fp16, []> var_1723_to_fp16 = const()[name = tensor<string, []>("op_1723_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_135_cast_fp16 = mul(x = var_1722_cast_fp16, y = var_1723_to_fp16)[name = tensor<string, []>("aw_chunk_135_cast_fp16")];
+            tensor<string, []> var_1726_equation_0 = const()[name = tensor<string, []>("op_1726_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1572_cast_fp16, var_1358_cast_fp16))[name = tensor<string, []>("op_1726_cast_fp16")];
+            tensor<fp16, []> var_1727_to_fp16 = const()[name = tensor<string, []>("op_1727_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_137_cast_fp16 = mul(x = var_1726_cast_fp16, y = var_1727_to_fp16)[name = tensor<string, []>("aw_chunk_137_cast_fp16")];
+            tensor<string, []> var_1730_equation_0 = const()[name = tensor<string, []>("op_1730_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1572_cast_fp16, var_1365_cast_fp16))[name = tensor<string, []>("op_1730_cast_fp16")];
+            tensor<fp16, []> var_1731_to_fp16 = const()[name = tensor<string, []>("op_1731_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_139_cast_fp16 = mul(x = var_1730_cast_fp16, y = var_1731_to_fp16)[name = tensor<string, []>("aw_chunk_139_cast_fp16")];
+            tensor<string, []> var_1734_equation_0 = const()[name = tensor<string, []>("op_1734_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1572_cast_fp16, var_1372_cast_fp16))[name = tensor<string, []>("op_1734_cast_fp16")];
+            tensor<fp16, []> var_1735_to_fp16 = const()[name = tensor<string, []>("op_1735_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_141_cast_fp16 = mul(x = var_1734_cast_fp16, y = var_1735_to_fp16)[name = tensor<string, []>("aw_chunk_141_cast_fp16")];
+            tensor<string, []> var_1738_equation_0 = const()[name = tensor<string, []>("op_1738_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1572_cast_fp16, var_1379_cast_fp16))[name = tensor<string, []>("op_1738_cast_fp16")];
+            tensor<fp16, []> var_1739_to_fp16 = const()[name = tensor<string, []>("op_1739_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_143_cast_fp16 = mul(x = var_1738_cast_fp16, y = var_1739_to_fp16)[name = tensor<string, []>("aw_chunk_143_cast_fp16")];
+            tensor<string, []> var_1742_equation_0 = const()[name = tensor<string, []>("op_1742_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1576_cast_fp16, var_1386_cast_fp16))[name = tensor<string, []>("op_1742_cast_fp16")];
+            tensor<fp16, []> var_1743_to_fp16 = const()[name = tensor<string, []>("op_1743_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_145_cast_fp16 = mul(x = var_1742_cast_fp16, y = var_1743_to_fp16)[name = tensor<string, []>("aw_chunk_145_cast_fp16")];
+            tensor<string, []> var_1746_equation_0 = const()[name = tensor<string, []>("op_1746_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1576_cast_fp16, var_1393_cast_fp16))[name = tensor<string, []>("op_1746_cast_fp16")];
+            tensor<fp16, []> var_1747_to_fp16 = const()[name = tensor<string, []>("op_1747_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_147_cast_fp16 = mul(x = var_1746_cast_fp16, y = var_1747_to_fp16)[name = tensor<string, []>("aw_chunk_147_cast_fp16")];
+            tensor<string, []> var_1750_equation_0 = const()[name = tensor<string, []>("op_1750_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1576_cast_fp16, var_1400_cast_fp16))[name = tensor<string, []>("op_1750_cast_fp16")];
+            tensor<fp16, []> var_1751_to_fp16 = const()[name = tensor<string, []>("op_1751_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_149_cast_fp16 = mul(x = var_1750_cast_fp16, y = var_1751_to_fp16)[name = tensor<string, []>("aw_chunk_149_cast_fp16")];
+            tensor<string, []> var_1754_equation_0 = const()[name = tensor<string, []>("op_1754_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1576_cast_fp16, var_1407_cast_fp16))[name = tensor<string, []>("op_1754_cast_fp16")];
+            tensor<fp16, []> var_1755_to_fp16 = const()[name = tensor<string, []>("op_1755_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_151_cast_fp16 = mul(x = var_1754_cast_fp16, y = var_1755_to_fp16)[name = tensor<string, []>("aw_chunk_151_cast_fp16")];
+            tensor<string, []> var_1758_equation_0 = const()[name = tensor<string, []>("op_1758_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1580_cast_fp16, var_1414_cast_fp16))[name = tensor<string, []>("op_1758_cast_fp16")];
+            tensor<fp16, []> var_1759_to_fp16 = const()[name = tensor<string, []>("op_1759_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_153_cast_fp16 = mul(x = var_1758_cast_fp16, y = var_1759_to_fp16)[name = tensor<string, []>("aw_chunk_153_cast_fp16")];
+            tensor<string, []> var_1762_equation_0 = const()[name = tensor<string, []>("op_1762_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1762_cast_fp16 = einsum(equation = var_1762_equation_0, values = (var_1580_cast_fp16, var_1421_cast_fp16))[name = tensor<string, []>("op_1762_cast_fp16")];
+            tensor<fp16, []> var_1763_to_fp16 = const()[name = tensor<string, []>("op_1763_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_155_cast_fp16 = mul(x = var_1762_cast_fp16, y = var_1763_to_fp16)[name = tensor<string, []>("aw_chunk_155_cast_fp16")];
+            tensor<string, []> var_1766_equation_0 = const()[name = tensor<string, []>("op_1766_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1766_cast_fp16 = einsum(equation = var_1766_equation_0, values = (var_1580_cast_fp16, var_1428_cast_fp16))[name = tensor<string, []>("op_1766_cast_fp16")];
+            tensor<fp16, []> var_1767_to_fp16 = const()[name = tensor<string, []>("op_1767_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_157_cast_fp16 = mul(x = var_1766_cast_fp16, y = var_1767_to_fp16)[name = tensor<string, []>("aw_chunk_157_cast_fp16")];
+            tensor<string, []> var_1770_equation_0 = const()[name = tensor<string, []>("op_1770_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1770_cast_fp16 = einsum(equation = var_1770_equation_0, values = (var_1580_cast_fp16, var_1435_cast_fp16))[name = tensor<string, []>("op_1770_cast_fp16")];
+            tensor<fp16, []> var_1771_to_fp16 = const()[name = tensor<string, []>("op_1771_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_159_cast_fp16 = mul(x = var_1770_cast_fp16, y = var_1771_to_fp16)[name = tensor<string, []>("aw_chunk_159_cast_fp16")];
+            tensor<string, []> var_1774_equation_0 = const()[name = tensor<string, []>("op_1774_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1774_cast_fp16 = einsum(equation = var_1774_equation_0, values = (var_1584_cast_fp16, var_1442_cast_fp16))[name = tensor<string, []>("op_1774_cast_fp16")];
+            tensor<fp16, []> var_1775_to_fp16 = const()[name = tensor<string, []>("op_1775_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_161_cast_fp16 = mul(x = var_1774_cast_fp16, y = var_1775_to_fp16)[name = tensor<string, []>("aw_chunk_161_cast_fp16")];
+            tensor<string, []> var_1778_equation_0 = const()[name = tensor<string, []>("op_1778_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1778_cast_fp16 = einsum(equation = var_1778_equation_0, values = (var_1584_cast_fp16, var_1449_cast_fp16))[name = tensor<string, []>("op_1778_cast_fp16")];
+            tensor<fp16, []> var_1779_to_fp16 = const()[name = tensor<string, []>("op_1779_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_163_cast_fp16 = mul(x = var_1778_cast_fp16, y = var_1779_to_fp16)[name = tensor<string, []>("aw_chunk_163_cast_fp16")];
+            tensor<string, []> var_1782_equation_0 = const()[name = tensor<string, []>("op_1782_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1782_cast_fp16 = einsum(equation = var_1782_equation_0, values = (var_1584_cast_fp16, var_1456_cast_fp16))[name = tensor<string, []>("op_1782_cast_fp16")];
+            tensor<fp16, []> var_1783_to_fp16 = const()[name = tensor<string, []>("op_1783_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_165_cast_fp16 = mul(x = var_1782_cast_fp16, y = var_1783_to_fp16)[name = tensor<string, []>("aw_chunk_165_cast_fp16")];
+            tensor<string, []> var_1786_equation_0 = const()[name = tensor<string, []>("op_1786_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1786_cast_fp16 = einsum(equation = var_1786_equation_0, values = (var_1584_cast_fp16, var_1463_cast_fp16))[name = tensor<string, []>("op_1786_cast_fp16")];
+            tensor<fp16, []> var_1787_to_fp16 = const()[name = tensor<string, []>("op_1787_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_167_cast_fp16 = mul(x = var_1786_cast_fp16, y = var_1787_to_fp16)[name = tensor<string, []>("aw_chunk_167_cast_fp16")];
+            tensor<string, []> var_1790_equation_0 = const()[name = tensor<string, []>("op_1790_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1790_cast_fp16 = einsum(equation = var_1790_equation_0, values = (var_1588_cast_fp16, var_1470_cast_fp16))[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<fp16, []> var_1791_to_fp16 = const()[name = tensor<string, []>("op_1791_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_169_cast_fp16 = mul(x = var_1790_cast_fp16, y = var_1791_to_fp16)[name = tensor<string, []>("aw_chunk_169_cast_fp16")];
+            tensor<string, []> var_1794_equation_0 = const()[name = tensor<string, []>("op_1794_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1794_cast_fp16 = einsum(equation = var_1794_equation_0, values = (var_1588_cast_fp16, var_1477_cast_fp16))[name = tensor<string, []>("op_1794_cast_fp16")];
+            tensor<fp16, []> var_1795_to_fp16 = const()[name = tensor<string, []>("op_1795_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_171_cast_fp16 = mul(x = var_1794_cast_fp16, y = var_1795_to_fp16)[name = tensor<string, []>("aw_chunk_171_cast_fp16")];
+            tensor<string, []> var_1798_equation_0 = const()[name = tensor<string, []>("op_1798_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1798_cast_fp16 = einsum(equation = var_1798_equation_0, values = (var_1588_cast_fp16, var_1484_cast_fp16))[name = tensor<string, []>("op_1798_cast_fp16")];
+            tensor<fp16, []> var_1799_to_fp16 = const()[name = tensor<string, []>("op_1799_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_173_cast_fp16 = mul(x = var_1798_cast_fp16, y = var_1799_to_fp16)[name = tensor<string, []>("aw_chunk_173_cast_fp16")];
+            tensor<string, []> var_1802_equation_0 = const()[name = tensor<string, []>("op_1802_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1802_cast_fp16 = einsum(equation = var_1802_equation_0, values = (var_1588_cast_fp16, var_1491_cast_fp16))[name = tensor<string, []>("op_1802_cast_fp16")];
+            tensor<fp16, []> var_1803_to_fp16 = const()[name = tensor<string, []>("op_1803_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_175_cast_fp16 = mul(x = var_1802_cast_fp16, y = var_1803_to_fp16)[name = tensor<string, []>("aw_chunk_175_cast_fp16")];
+            tensor<string, []> var_1806_equation_0 = const()[name = tensor<string, []>("op_1806_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1806_cast_fp16 = einsum(equation = var_1806_equation_0, values = (var_1592_cast_fp16, var_1498_cast_fp16))[name = tensor<string, []>("op_1806_cast_fp16")];
+            tensor<fp16, []> var_1807_to_fp16 = const()[name = tensor<string, []>("op_1807_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_177_cast_fp16 = mul(x = var_1806_cast_fp16, y = var_1807_to_fp16)[name = tensor<string, []>("aw_chunk_177_cast_fp16")];
+            tensor<string, []> var_1810_equation_0 = const()[name = tensor<string, []>("op_1810_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1810_cast_fp16 = einsum(equation = var_1810_equation_0, values = (var_1592_cast_fp16, var_1505_cast_fp16))[name = tensor<string, []>("op_1810_cast_fp16")];
+            tensor<fp16, []> var_1811_to_fp16 = const()[name = tensor<string, []>("op_1811_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_179_cast_fp16 = mul(x = var_1810_cast_fp16, y = var_1811_to_fp16)[name = tensor<string, []>("aw_chunk_179_cast_fp16")];
+            tensor<string, []> var_1814_equation_0 = const()[name = tensor<string, []>("op_1814_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1814_cast_fp16 = einsum(equation = var_1814_equation_0, values = (var_1592_cast_fp16, var_1512_cast_fp16))[name = tensor<string, []>("op_1814_cast_fp16")];
+            tensor<fp16, []> var_1815_to_fp16 = const()[name = tensor<string, []>("op_1815_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_181_cast_fp16 = mul(x = var_1814_cast_fp16, y = var_1815_to_fp16)[name = tensor<string, []>("aw_chunk_181_cast_fp16")];
+            tensor<string, []> var_1818_equation_0 = const()[name = tensor<string, []>("op_1818_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1818_cast_fp16 = einsum(equation = var_1818_equation_0, values = (var_1592_cast_fp16, var_1519_cast_fp16))[name = tensor<string, []>("op_1818_cast_fp16")];
+            tensor<fp16, []> var_1819_to_fp16 = const()[name = tensor<string, []>("op_1819_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_183_cast_fp16 = mul(x = var_1818_cast_fp16, y = var_1819_to_fp16)[name = tensor<string, []>("aw_chunk_183_cast_fp16")];
+            tensor<string, []> var_1822_equation_0 = const()[name = tensor<string, []>("op_1822_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1822_cast_fp16 = einsum(equation = var_1822_equation_0, values = (var_1596_cast_fp16, var_1526_cast_fp16))[name = tensor<string, []>("op_1822_cast_fp16")];
+            tensor<fp16, []> var_1823_to_fp16 = const()[name = tensor<string, []>("op_1823_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_185_cast_fp16 = mul(x = var_1822_cast_fp16, y = var_1823_to_fp16)[name = tensor<string, []>("aw_chunk_185_cast_fp16")];
+            tensor<string, []> var_1826_equation_0 = const()[name = tensor<string, []>("op_1826_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1826_cast_fp16 = einsum(equation = var_1826_equation_0, values = (var_1596_cast_fp16, var_1533_cast_fp16))[name = tensor<string, []>("op_1826_cast_fp16")];
+            tensor<fp16, []> var_1827_to_fp16 = const()[name = tensor<string, []>("op_1827_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_187_cast_fp16 = mul(x = var_1826_cast_fp16, y = var_1827_to_fp16)[name = tensor<string, []>("aw_chunk_187_cast_fp16")];
+            tensor<string, []> var_1830_equation_0 = const()[name = tensor<string, []>("op_1830_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1830_cast_fp16 = einsum(equation = var_1830_equation_0, values = (var_1596_cast_fp16, var_1540_cast_fp16))[name = tensor<string, []>("op_1830_cast_fp16")];
+            tensor<fp16, []> var_1831_to_fp16 = const()[name = tensor<string, []>("op_1831_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_189_cast_fp16 = mul(x = var_1830_cast_fp16, y = var_1831_to_fp16)[name = tensor<string, []>("aw_chunk_189_cast_fp16")];
+            tensor<string, []> var_1834_equation_0 = const()[name = tensor<string, []>("op_1834_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1834_cast_fp16 = einsum(equation = var_1834_equation_0, values = (var_1596_cast_fp16, var_1547_cast_fp16))[name = tensor<string, []>("op_1834_cast_fp16")];
+            tensor<fp16, []> var_1835_to_fp16 = const()[name = tensor<string, []>("op_1835_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_191_cast_fp16 = mul(x = var_1834_cast_fp16, y = var_1835_to_fp16)[name = tensor<string, []>("aw_chunk_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1837_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_97_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1838_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_99_cast_fp16)[name = tensor<string, []>("op_1838_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1839_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_101_cast_fp16)[name = tensor<string, []>("op_1839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1840_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_103_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1841_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_105_cast_fp16)[name = tensor<string, []>("op_1841_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1842_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_107_cast_fp16)[name = tensor<string, []>("op_1842_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1843_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_109_cast_fp16)[name = tensor<string, []>("op_1843_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1844_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_111_cast_fp16)[name = tensor<string, []>("op_1844_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1845_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_113_cast_fp16)[name = tensor<string, []>("op_1845_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1846_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_115_cast_fp16)[name = tensor<string, []>("op_1846_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1847_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_117_cast_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1848_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_119_cast_fp16)[name = tensor<string, []>("op_1848_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1849_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_121_cast_fp16)[name = tensor<string, []>("op_1849_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1850_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_123_cast_fp16)[name = tensor<string, []>("op_1850_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1851_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_125_cast_fp16)[name = tensor<string, []>("op_1851_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1852_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_127_cast_fp16)[name = tensor<string, []>("op_1852_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1853_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_129_cast_fp16)[name = tensor<string, []>("op_1853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1854_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_131_cast_fp16)[name = tensor<string, []>("op_1854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1855_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_133_cast_fp16)[name = tensor<string, []>("op_1855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1856_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_135_cast_fp16)[name = tensor<string, []>("op_1856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1857_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_137_cast_fp16)[name = tensor<string, []>("op_1857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1858_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_139_cast_fp16)[name = tensor<string, []>("op_1858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1859_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_141_cast_fp16)[name = tensor<string, []>("op_1859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1860_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_143_cast_fp16)[name = tensor<string, []>("op_1860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1861_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_145_cast_fp16)[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1862_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_147_cast_fp16)[name = tensor<string, []>("op_1862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1863_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_149_cast_fp16)[name = tensor<string, []>("op_1863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1864_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_151_cast_fp16)[name = tensor<string, []>("op_1864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1865_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_153_cast_fp16)[name = tensor<string, []>("op_1865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1866_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_155_cast_fp16)[name = tensor<string, []>("op_1866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1867_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_157_cast_fp16)[name = tensor<string, []>("op_1867_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1868_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_159_cast_fp16)[name = tensor<string, []>("op_1868_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1869_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_161_cast_fp16)[name = tensor<string, []>("op_1869_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1870_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_163_cast_fp16)[name = tensor<string, []>("op_1870_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1871_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_165_cast_fp16)[name = tensor<string, []>("op_1871_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1872_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_167_cast_fp16)[name = tensor<string, []>("op_1872_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1873_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_169_cast_fp16)[name = tensor<string, []>("op_1873_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1874_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_171_cast_fp16)[name = tensor<string, []>("op_1874_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1875_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_173_cast_fp16)[name = tensor<string, []>("op_1875_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1876_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_175_cast_fp16)[name = tensor<string, []>("op_1876_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1877_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_177_cast_fp16)[name = tensor<string, []>("op_1877_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1878_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_179_cast_fp16)[name = tensor<string, []>("op_1878_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1879_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_181_cast_fp16)[name = tensor<string, []>("op_1879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1880_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_183_cast_fp16)[name = tensor<string, []>("op_1880_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1881_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_185_cast_fp16)[name = tensor<string, []>("op_1881_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1882_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_187_cast_fp16)[name = tensor<string, []>("op_1882_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1883_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_189_cast_fp16)[name = tensor<string, []>("op_1883_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1884_cast_fp16 = softmax(axis = var_1110, x = aw_chunk_191_cast_fp16)[name = tensor<string, []>("op_1884_cast_fp16")];
+            tensor<string, []> var_1886_equation_0 = const()[name = tensor<string, []>("op_1886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1886_cast_fp16 = einsum(equation = var_1886_equation_0, values = (var_1598_cast_fp16, var_1837_cast_fp16))[name = tensor<string, []>("op_1886_cast_fp16")];
+            tensor<string, []> var_1888_equation_0 = const()[name = tensor<string, []>("op_1888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1888_cast_fp16 = einsum(equation = var_1888_equation_0, values = (var_1598_cast_fp16, var_1838_cast_fp16))[name = tensor<string, []>("op_1888_cast_fp16")];
+            tensor<string, []> var_1890_equation_0 = const()[name = tensor<string, []>("op_1890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1890_cast_fp16 = einsum(equation = var_1890_equation_0, values = (var_1598_cast_fp16, var_1839_cast_fp16))[name = tensor<string, []>("op_1890_cast_fp16")];
+            tensor<string, []> var_1892_equation_0 = const()[name = tensor<string, []>("op_1892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1892_cast_fp16 = einsum(equation = var_1892_equation_0, values = (var_1598_cast_fp16, var_1840_cast_fp16))[name = tensor<string, []>("op_1892_cast_fp16")];
+            tensor<string, []> var_1894_equation_0 = const()[name = tensor<string, []>("op_1894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1894_cast_fp16 = einsum(equation = var_1894_equation_0, values = (var_1602_cast_fp16, var_1841_cast_fp16))[name = tensor<string, []>("op_1894_cast_fp16")];
+            tensor<string, []> var_1896_equation_0 = const()[name = tensor<string, []>("op_1896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1896_cast_fp16 = einsum(equation = var_1896_equation_0, values = (var_1602_cast_fp16, var_1842_cast_fp16))[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<string, []> var_1898_equation_0 = const()[name = tensor<string, []>("op_1898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1898_cast_fp16 = einsum(equation = var_1898_equation_0, values = (var_1602_cast_fp16, var_1843_cast_fp16))[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<string, []> var_1900_equation_0 = const()[name = tensor<string, []>("op_1900_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1900_cast_fp16 = einsum(equation = var_1900_equation_0, values = (var_1602_cast_fp16, var_1844_cast_fp16))[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<string, []> var_1902_equation_0 = const()[name = tensor<string, []>("op_1902_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1902_cast_fp16 = einsum(equation = var_1902_equation_0, values = (var_1606_cast_fp16, var_1845_cast_fp16))[name = tensor<string, []>("op_1902_cast_fp16")];
+            tensor<string, []> var_1904_equation_0 = const()[name = tensor<string, []>("op_1904_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1904_cast_fp16 = einsum(equation = var_1904_equation_0, values = (var_1606_cast_fp16, var_1846_cast_fp16))[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<string, []> var_1906_equation_0 = const()[name = tensor<string, []>("op_1906_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1906_cast_fp16 = einsum(equation = var_1906_equation_0, values = (var_1606_cast_fp16, var_1847_cast_fp16))[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<string, []> var_1908_equation_0 = const()[name = tensor<string, []>("op_1908_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1908_cast_fp16 = einsum(equation = var_1908_equation_0, values = (var_1606_cast_fp16, var_1848_cast_fp16))[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<string, []> var_1910_equation_0 = const()[name = tensor<string, []>("op_1910_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1910_cast_fp16 = einsum(equation = var_1910_equation_0, values = (var_1610_cast_fp16, var_1849_cast_fp16))[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<string, []> var_1912_equation_0 = const()[name = tensor<string, []>("op_1912_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1912_cast_fp16 = einsum(equation = var_1912_equation_0, values = (var_1610_cast_fp16, var_1850_cast_fp16))[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<string, []> var_1914_equation_0 = const()[name = tensor<string, []>("op_1914_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1914_cast_fp16 = einsum(equation = var_1914_equation_0, values = (var_1610_cast_fp16, var_1851_cast_fp16))[name = tensor<string, []>("op_1914_cast_fp16")];
+            tensor<string, []> var_1916_equation_0 = const()[name = tensor<string, []>("op_1916_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1916_cast_fp16 = einsum(equation = var_1916_equation_0, values = (var_1610_cast_fp16, var_1852_cast_fp16))[name = tensor<string, []>("op_1916_cast_fp16")];
+            tensor<string, []> var_1918_equation_0 = const()[name = tensor<string, []>("op_1918_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1918_cast_fp16 = einsum(equation = var_1918_equation_0, values = (var_1614_cast_fp16, var_1853_cast_fp16))[name = tensor<string, []>("op_1918_cast_fp16")];
+            tensor<string, []> var_1920_equation_0 = const()[name = tensor<string, []>("op_1920_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1920_cast_fp16 = einsum(equation = var_1920_equation_0, values = (var_1614_cast_fp16, var_1854_cast_fp16))[name = tensor<string, []>("op_1920_cast_fp16")];
+            tensor<string, []> var_1922_equation_0 = const()[name = tensor<string, []>("op_1922_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1922_cast_fp16 = einsum(equation = var_1922_equation_0, values = (var_1614_cast_fp16, var_1855_cast_fp16))[name = tensor<string, []>("op_1922_cast_fp16")];
+            tensor<string, []> var_1924_equation_0 = const()[name = tensor<string, []>("op_1924_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1924_cast_fp16 = einsum(equation = var_1924_equation_0, values = (var_1614_cast_fp16, var_1856_cast_fp16))[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<string, []> var_1926_equation_0 = const()[name = tensor<string, []>("op_1926_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1926_cast_fp16 = einsum(equation = var_1926_equation_0, values = (var_1618_cast_fp16, var_1857_cast_fp16))[name = tensor<string, []>("op_1926_cast_fp16")];
+            tensor<string, []> var_1928_equation_0 = const()[name = tensor<string, []>("op_1928_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1928_cast_fp16 = einsum(equation = var_1928_equation_0, values = (var_1618_cast_fp16, var_1858_cast_fp16))[name = tensor<string, []>("op_1928_cast_fp16")];
+            tensor<string, []> var_1930_equation_0 = const()[name = tensor<string, []>("op_1930_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1930_cast_fp16 = einsum(equation = var_1930_equation_0, values = (var_1618_cast_fp16, var_1859_cast_fp16))[name = tensor<string, []>("op_1930_cast_fp16")];
+            tensor<string, []> var_1932_equation_0 = const()[name = tensor<string, []>("op_1932_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1932_cast_fp16 = einsum(equation = var_1932_equation_0, values = (var_1618_cast_fp16, var_1860_cast_fp16))[name = tensor<string, []>("op_1932_cast_fp16")];
+            tensor<string, []> var_1934_equation_0 = const()[name = tensor<string, []>("op_1934_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1934_cast_fp16 = einsum(equation = var_1934_equation_0, values = (var_1622_cast_fp16, var_1861_cast_fp16))[name = tensor<string, []>("op_1934_cast_fp16")];
+            tensor<string, []> var_1936_equation_0 = const()[name = tensor<string, []>("op_1936_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1936_cast_fp16 = einsum(equation = var_1936_equation_0, values = (var_1622_cast_fp16, var_1862_cast_fp16))[name = tensor<string, []>("op_1936_cast_fp16")];
+            tensor<string, []> var_1938_equation_0 = const()[name = tensor<string, []>("op_1938_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1938_cast_fp16 = einsum(equation = var_1938_equation_0, values = (var_1622_cast_fp16, var_1863_cast_fp16))[name = tensor<string, []>("op_1938_cast_fp16")];
+            tensor<string, []> var_1940_equation_0 = const()[name = tensor<string, []>("op_1940_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1940_cast_fp16 = einsum(equation = var_1940_equation_0, values = (var_1622_cast_fp16, var_1864_cast_fp16))[name = tensor<string, []>("op_1940_cast_fp16")];
+            tensor<string, []> var_1942_equation_0 = const()[name = tensor<string, []>("op_1942_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1942_cast_fp16 = einsum(equation = var_1942_equation_0, values = (var_1626_cast_fp16, var_1865_cast_fp16))[name = tensor<string, []>("op_1942_cast_fp16")];
+            tensor<string, []> var_1944_equation_0 = const()[name = tensor<string, []>("op_1944_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1944_cast_fp16 = einsum(equation = var_1944_equation_0, values = (var_1626_cast_fp16, var_1866_cast_fp16))[name = tensor<string, []>("op_1944_cast_fp16")];
+            tensor<string, []> var_1946_equation_0 = const()[name = tensor<string, []>("op_1946_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1946_cast_fp16 = einsum(equation = var_1946_equation_0, values = (var_1626_cast_fp16, var_1867_cast_fp16))[name = tensor<string, []>("op_1946_cast_fp16")];
+            tensor<string, []> var_1948_equation_0 = const()[name = tensor<string, []>("op_1948_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1948_cast_fp16 = einsum(equation = var_1948_equation_0, values = (var_1626_cast_fp16, var_1868_cast_fp16))[name = tensor<string, []>("op_1948_cast_fp16")];
+            tensor<string, []> var_1950_equation_0 = const()[name = tensor<string, []>("op_1950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1950_cast_fp16 = einsum(equation = var_1950_equation_0, values = (var_1630_cast_fp16, var_1869_cast_fp16))[name = tensor<string, []>("op_1950_cast_fp16")];
+            tensor<string, []> var_1952_equation_0 = const()[name = tensor<string, []>("op_1952_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1952_cast_fp16 = einsum(equation = var_1952_equation_0, values = (var_1630_cast_fp16, var_1870_cast_fp16))[name = tensor<string, []>("op_1952_cast_fp16")];
+            tensor<string, []> var_1954_equation_0 = const()[name = tensor<string, []>("op_1954_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1954_cast_fp16 = einsum(equation = var_1954_equation_0, values = (var_1630_cast_fp16, var_1871_cast_fp16))[name = tensor<string, []>("op_1954_cast_fp16")];
+            tensor<string, []> var_1956_equation_0 = const()[name = tensor<string, []>("op_1956_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1956_cast_fp16 = einsum(equation = var_1956_equation_0, values = (var_1630_cast_fp16, var_1872_cast_fp16))[name = tensor<string, []>("op_1956_cast_fp16")];
+            tensor<string, []> var_1958_equation_0 = const()[name = tensor<string, []>("op_1958_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1958_cast_fp16 = einsum(equation = var_1958_equation_0, values = (var_1634_cast_fp16, var_1873_cast_fp16))[name = tensor<string, []>("op_1958_cast_fp16")];
+            tensor<string, []> var_1960_equation_0 = const()[name = tensor<string, []>("op_1960_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1960_cast_fp16 = einsum(equation = var_1960_equation_0, values = (var_1634_cast_fp16, var_1874_cast_fp16))[name = tensor<string, []>("op_1960_cast_fp16")];
+            tensor<string, []> var_1962_equation_0 = const()[name = tensor<string, []>("op_1962_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1962_cast_fp16 = einsum(equation = var_1962_equation_0, values = (var_1634_cast_fp16, var_1875_cast_fp16))[name = tensor<string, []>("op_1962_cast_fp16")];
+            tensor<string, []> var_1964_equation_0 = const()[name = tensor<string, []>("op_1964_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1964_cast_fp16 = einsum(equation = var_1964_equation_0, values = (var_1634_cast_fp16, var_1876_cast_fp16))[name = tensor<string, []>("op_1964_cast_fp16")];
+            tensor<string, []> var_1966_equation_0 = const()[name = tensor<string, []>("op_1966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1966_cast_fp16 = einsum(equation = var_1966_equation_0, values = (var_1638_cast_fp16, var_1877_cast_fp16))[name = tensor<string, []>("op_1966_cast_fp16")];
+            tensor<string, []> var_1968_equation_0 = const()[name = tensor<string, []>("op_1968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1968_cast_fp16 = einsum(equation = var_1968_equation_0, values = (var_1638_cast_fp16, var_1878_cast_fp16))[name = tensor<string, []>("op_1968_cast_fp16")];
+            tensor<string, []> var_1970_equation_0 = const()[name = tensor<string, []>("op_1970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1970_cast_fp16 = einsum(equation = var_1970_equation_0, values = (var_1638_cast_fp16, var_1879_cast_fp16))[name = tensor<string, []>("op_1970_cast_fp16")];
+            tensor<string, []> var_1972_equation_0 = const()[name = tensor<string, []>("op_1972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1972_cast_fp16 = einsum(equation = var_1972_equation_0, values = (var_1638_cast_fp16, var_1880_cast_fp16))[name = tensor<string, []>("op_1972_cast_fp16")];
+            tensor<string, []> var_1974_equation_0 = const()[name = tensor<string, []>("op_1974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1974_cast_fp16 = einsum(equation = var_1974_equation_0, values = (var_1642_cast_fp16, var_1881_cast_fp16))[name = tensor<string, []>("op_1974_cast_fp16")];
+            tensor<string, []> var_1976_equation_0 = const()[name = tensor<string, []>("op_1976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1976_cast_fp16 = einsum(equation = var_1976_equation_0, values = (var_1642_cast_fp16, var_1882_cast_fp16))[name = tensor<string, []>("op_1976_cast_fp16")];
+            tensor<string, []> var_1978_equation_0 = const()[name = tensor<string, []>("op_1978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1978_cast_fp16 = einsum(equation = var_1978_equation_0, values = (var_1642_cast_fp16, var_1883_cast_fp16))[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<string, []> var_1980_equation_0 = const()[name = tensor<string, []>("op_1980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1980_cast_fp16 = einsum(equation = var_1980_equation_0, values = (var_1642_cast_fp16, var_1884_cast_fp16))[name = tensor<string, []>("op_1980_cast_fp16")];
+            tensor<bool, []> var_1982_interleave_0 = const()[name = tensor<string, []>("op_1982_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1982_cast_fp16 = concat(axis = var_1093, interleave = var_1982_interleave_0, values = (var_1886_cast_fp16, var_1888_cast_fp16, var_1890_cast_fp16, var_1892_cast_fp16))[name = tensor<string, []>("op_1982_cast_fp16")];
+            tensor<bool, []> var_1984_interleave_0 = const()[name = tensor<string, []>("op_1984_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1984_cast_fp16 = concat(axis = var_1093, interleave = var_1984_interleave_0, values = (var_1894_cast_fp16, var_1896_cast_fp16, var_1898_cast_fp16, var_1900_cast_fp16))[name = tensor<string, []>("op_1984_cast_fp16")];
+            tensor<bool, []> var_1986_interleave_0 = const()[name = tensor<string, []>("op_1986_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1986_cast_fp16 = concat(axis = var_1093, interleave = var_1986_interleave_0, values = (var_1902_cast_fp16, var_1904_cast_fp16, var_1906_cast_fp16, var_1908_cast_fp16))[name = tensor<string, []>("op_1986_cast_fp16")];
+            tensor<bool, []> var_1988_interleave_0 = const()[name = tensor<string, []>("op_1988_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1988_cast_fp16 = concat(axis = var_1093, interleave = var_1988_interleave_0, values = (var_1910_cast_fp16, var_1912_cast_fp16, var_1914_cast_fp16, var_1916_cast_fp16))[name = tensor<string, []>("op_1988_cast_fp16")];
+            tensor<bool, []> var_1990_interleave_0 = const()[name = tensor<string, []>("op_1990_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1990_cast_fp16 = concat(axis = var_1093, interleave = var_1990_interleave_0, values = (var_1918_cast_fp16, var_1920_cast_fp16, var_1922_cast_fp16, var_1924_cast_fp16))[name = tensor<string, []>("op_1990_cast_fp16")];
+            tensor<bool, []> var_1992_interleave_0 = const()[name = tensor<string, []>("op_1992_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1992_cast_fp16 = concat(axis = var_1093, interleave = var_1992_interleave_0, values = (var_1926_cast_fp16, var_1928_cast_fp16, var_1930_cast_fp16, var_1932_cast_fp16))[name = tensor<string, []>("op_1992_cast_fp16")];
+            tensor<bool, []> var_1994_interleave_0 = const()[name = tensor<string, []>("op_1994_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1994_cast_fp16 = concat(axis = var_1093, interleave = var_1994_interleave_0, values = (var_1934_cast_fp16, var_1936_cast_fp16, var_1938_cast_fp16, var_1940_cast_fp16))[name = tensor<string, []>("op_1994_cast_fp16")];
+            tensor<bool, []> var_1996_interleave_0 = const()[name = tensor<string, []>("op_1996_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1996_cast_fp16 = concat(axis = var_1093, interleave = var_1996_interleave_0, values = (var_1942_cast_fp16, var_1944_cast_fp16, var_1946_cast_fp16, var_1948_cast_fp16))[name = tensor<string, []>("op_1996_cast_fp16")];
+            tensor<bool, []> var_1998_interleave_0 = const()[name = tensor<string, []>("op_1998_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1998_cast_fp16 = concat(axis = var_1093, interleave = var_1998_interleave_0, values = (var_1950_cast_fp16, var_1952_cast_fp16, var_1954_cast_fp16, var_1956_cast_fp16))[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<bool, []> var_2000_interleave_0 = const()[name = tensor<string, []>("op_2000_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2000_cast_fp16 = concat(axis = var_1093, interleave = var_2000_interleave_0, values = (var_1958_cast_fp16, var_1960_cast_fp16, var_1962_cast_fp16, var_1964_cast_fp16))[name = tensor<string, []>("op_2000_cast_fp16")];
+            tensor<bool, []> var_2002_interleave_0 = const()[name = tensor<string, []>("op_2002_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2002_cast_fp16 = concat(axis = var_1093, interleave = var_2002_interleave_0, values = (var_1966_cast_fp16, var_1968_cast_fp16, var_1970_cast_fp16, var_1972_cast_fp16))[name = tensor<string, []>("op_2002_cast_fp16")];
+            tensor<bool, []> var_2004_interleave_0 = const()[name = tensor<string, []>("op_2004_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2004_cast_fp16 = concat(axis = var_1093, interleave = var_2004_interleave_0, values = (var_1974_cast_fp16, var_1976_cast_fp16, var_1978_cast_fp16, var_1980_cast_fp16))[name = tensor<string, []>("op_2004_cast_fp16")];
+            tensor<bool, []> input_9_interleave_0 = const()[name = tensor<string, []>("input_9_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_9_cast_fp16 = concat(axis = var_1110, interleave = input_9_interleave_0, values = (var_1982_cast_fp16, var_1984_cast_fp16, var_1986_cast_fp16, var_1988_cast_fp16, var_1990_cast_fp16, var_1992_cast_fp16, var_1994_cast_fp16, var_1996_cast_fp16, var_1998_cast_fp16, var_2000_cast_fp16, var_2002_cast_fp16, var_2004_cast_fp16))[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_2009 = const()[name = tensor<string, []>("op_2009"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2011 = const()[name = tensor<string, []>("op_2011"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23938944)))];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25118656)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_2011, groups = var_1110, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_2009, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> var_2017 = const()[name = tensor<string, []>("op_2017"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_2017, keep_dims = var_1111, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_2021 = const()[name = tensor<string, []>("op_2021"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2022_cast_fp16 = reduce_mean(axes = var_2021, keep_dims = var_1111, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_2022_cast_fp16")];
+            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2024_cast_fp16 = add(x = var_2022_cast_fp16, y = var_2023_to_fp16)[name = tensor<string, []>("op_2024_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_2024_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [768]> input_11_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_11_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25120256)))];
+            tensor<fp16, [768]> input_11_beta_0_to_fp16 = const()[name = tensor<string, []>("input_11_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25121856)))];
+            tensor<fp16, []> input_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_2035 = const()[name = tensor<string, []>("op_2035"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2037 = const()[name = tensor<string, []>("op_2037"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_13_pad_type_0 = const()[name = tensor<string, []>("input_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25123456)))];
+            tensor<fp16, [3072]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29842112)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_2037, groups = var_1110, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = var_2035, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_2043 = const()[name = tensor<string, []>("op_2043"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2045 = const()[name = tensor<string, []>("op_2045"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29848320)))];
+            tensor<fp16, [768]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34566976)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_2045, groups = var_1110, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_2043, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_2052 = const()[name = tensor<string, []>("op_2052"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2069 = const()[name = tensor<string, []>("op_2069"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2070 = const()[name = tensor<string, []>("op_2070"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_2080, keep_dims = var_2070, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_2084 = const()[name = tensor<string, []>("op_2084"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2085_cast_fp16 = reduce_mean(axes = var_2084, keep_dims = var_2070, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_2085_cast_fp16")];
+            tensor<fp16, []> var_2086_to_fp16 = const()[name = tensor<string, []>("op_2086_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2087_cast_fp16 = add(x = var_2085_cast_fp16, y = var_2086_to_fp16)[name = tensor<string, []>("op_2087_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_2087_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34568576)))];
+            tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34570176)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_2102 = const()[name = tensor<string, []>("op_2102"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2104 = const()[name = tensor<string, []>("op_2104"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34571776)))];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35751488)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_2104, groups = var_2069, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_2102, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_2108 = const()[name = tensor<string, []>("op_2108"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2110 = const()[name = tensor<string, []>("op_2110"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_5_pad_type_0 = const()[name = tensor<string, []>("key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_5_pad_0 = const()[name = tensor<string, []>("key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35753088)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_5_cast_fp16 = conv(dilations = var_2110, groups = var_2069, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = var_2108, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<int32, [2]> var_2115 = const()[name = tensor<string, []>("op_2115"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2117 = const()[name = tensor<string, []>("op_2117"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_5_pad_type_0 = const()[name = tensor<string, []>("value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_5_pad_0 = const()[name = tensor<string, []>("value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36932800)))];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38112512)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_2117, groups = var_2069, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = var_2115, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_2124_begin_0 = const()[name = tensor<string, []>("op_2124_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2124_end_0 = const()[name = tensor<string, []>("op_2124_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2124_end_mask_0 = const()[name = tensor<string, []>("op_2124_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2124_cast_fp16 = slice_by_index(begin = var_2124_begin_0, end = var_2124_end_0, end_mask = var_2124_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2124_cast_fp16")];
+            tensor<int32, [4]> var_2128_begin_0 = const()[name = tensor<string, []>("op_2128_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2128_end_0 = const()[name = tensor<string, []>("op_2128_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2128_end_mask_0 = const()[name = tensor<string, []>("op_2128_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2128_cast_fp16 = slice_by_index(begin = var_2128_begin_0, end = var_2128_end_0, end_mask = var_2128_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2128_cast_fp16")];
+            tensor<int32, [4]> var_2132_begin_0 = const()[name = tensor<string, []>("op_2132_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2132_end_0 = const()[name = tensor<string, []>("op_2132_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2132_end_mask_0 = const()[name = tensor<string, []>("op_2132_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2132_cast_fp16 = slice_by_index(begin = var_2132_begin_0, end = var_2132_end_0, end_mask = var_2132_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2132_cast_fp16")];
+            tensor<int32, [4]> var_2136_begin_0 = const()[name = tensor<string, []>("op_2136_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2136_end_0 = const()[name = tensor<string, []>("op_2136_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2136_end_mask_0 = const()[name = tensor<string, []>("op_2136_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2136_cast_fp16 = slice_by_index(begin = var_2136_begin_0, end = var_2136_end_0, end_mask = var_2136_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2136_cast_fp16")];
+            tensor<int32, [4]> var_2140_begin_0 = const()[name = tensor<string, []>("op_2140_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2140_end_0 = const()[name = tensor<string, []>("op_2140_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2140_end_mask_0 = const()[name = tensor<string, []>("op_2140_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2140_cast_fp16 = slice_by_index(begin = var_2140_begin_0, end = var_2140_end_0, end_mask = var_2140_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2140_cast_fp16")];
+            tensor<int32, [4]> var_2144_begin_0 = const()[name = tensor<string, []>("op_2144_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2144_end_0 = const()[name = tensor<string, []>("op_2144_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2144_end_mask_0 = const()[name = tensor<string, []>("op_2144_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2144_cast_fp16 = slice_by_index(begin = var_2144_begin_0, end = var_2144_end_0, end_mask = var_2144_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<int32, [4]> var_2148_begin_0 = const()[name = tensor<string, []>("op_2148_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2148_end_0 = const()[name = tensor<string, []>("op_2148_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2148_end_mask_0 = const()[name = tensor<string, []>("op_2148_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2148_cast_fp16 = slice_by_index(begin = var_2148_begin_0, end = var_2148_end_0, end_mask = var_2148_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<int32, [4]> var_2152_begin_0 = const()[name = tensor<string, []>("op_2152_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2152_end_0 = const()[name = tensor<string, []>("op_2152_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2152_end_mask_0 = const()[name = tensor<string, []>("op_2152_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2152_cast_fp16 = slice_by_index(begin = var_2152_begin_0, end = var_2152_end_0, end_mask = var_2152_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2152_cast_fp16")];
+            tensor<int32, [4]> var_2156_begin_0 = const()[name = tensor<string, []>("op_2156_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_2156_end_0 = const()[name = tensor<string, []>("op_2156_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_2156_end_mask_0 = const()[name = tensor<string, []>("op_2156_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2156_cast_fp16 = slice_by_index(begin = var_2156_begin_0, end = var_2156_end_0, end_mask = var_2156_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2156_cast_fp16")];
+            tensor<int32, [4]> var_2160_begin_0 = const()[name = tensor<string, []>("op_2160_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_2160_end_0 = const()[name = tensor<string, []>("op_2160_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_2160_end_mask_0 = const()[name = tensor<string, []>("op_2160_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2160_cast_fp16 = slice_by_index(begin = var_2160_begin_0, end = var_2160_end_0, end_mask = var_2160_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2160_cast_fp16")];
+            tensor<int32, [4]> var_2164_begin_0 = const()[name = tensor<string, []>("op_2164_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_2164_end_0 = const()[name = tensor<string, []>("op_2164_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_2164_end_mask_0 = const()[name = tensor<string, []>("op_2164_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<int32, [4]> var_2168_begin_0 = const()[name = tensor<string, []>("op_2168_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_2168_end_0 = const()[name = tensor<string, []>("op_2168_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_2168_end_mask_0 = const()[name = tensor<string, []>("op_2168_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<int32, [4]> var_2177_begin_0 = const()[name = tensor<string, []>("op_2177_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2177_end_0 = const()[name = tensor<string, []>("op_2177_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2177_end_mask_0 = const()[name = tensor<string, []>("op_2177_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2177_cast_fp16 = slice_by_index(begin = var_2177_begin_0, end = var_2177_end_0, end_mask = var_2177_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<int32, [4]> var_2184_begin_0 = const()[name = tensor<string, []>("op_2184_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2184_end_0 = const()[name = tensor<string, []>("op_2184_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2184_end_mask_0 = const()[name = tensor<string, []>("op_2184_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<int32, [4]> var_2191_begin_0 = const()[name = tensor<string, []>("op_2191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2191_end_0 = const()[name = tensor<string, []>("op_2191_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2191_end_mask_0 = const()[name = tensor<string, []>("op_2191_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2191_cast_fp16 = slice_by_index(begin = var_2191_begin_0, end = var_2191_end_0, end_mask = var_2191_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2191_cast_fp16")];
+            tensor<int32, [4]> var_2198_begin_0 = const()[name = tensor<string, []>("op_2198_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2198_end_0 = const()[name = tensor<string, []>("op_2198_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2198_end_mask_0 = const()[name = tensor<string, []>("op_2198_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2198_cast_fp16 = slice_by_index(begin = var_2198_begin_0, end = var_2198_end_0, end_mask = var_2198_end_mask_0, x = var_2124_cast_fp16)[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<int32, [4]> var_2205_begin_0 = const()[name = tensor<string, []>("op_2205_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2205_end_0 = const()[name = tensor<string, []>("op_2205_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2205_end_mask_0 = const()[name = tensor<string, []>("op_2205_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2205_cast_fp16 = slice_by_index(begin = var_2205_begin_0, end = var_2205_end_0, end_mask = var_2205_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2205_cast_fp16")];
+            tensor<int32, [4]> var_2212_begin_0 = const()[name = tensor<string, []>("op_2212_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2212_end_0 = const()[name = tensor<string, []>("op_2212_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2212_end_mask_0 = const()[name = tensor<string, []>("op_2212_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2212_cast_fp16")];
+            tensor<int32, [4]> var_2219_begin_0 = const()[name = tensor<string, []>("op_2219_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2219_end_0 = const()[name = tensor<string, []>("op_2219_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2219_end_mask_0 = const()[name = tensor<string, []>("op_2219_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2219_cast_fp16 = slice_by_index(begin = var_2219_begin_0, end = var_2219_end_0, end_mask = var_2219_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2219_cast_fp16")];
+            tensor<int32, [4]> var_2226_begin_0 = const()[name = tensor<string, []>("op_2226_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2226_end_0 = const()[name = tensor<string, []>("op_2226_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2226_end_mask_0 = const()[name = tensor<string, []>("op_2226_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2226_cast_fp16 = slice_by_index(begin = var_2226_begin_0, end = var_2226_end_0, end_mask = var_2226_end_mask_0, x = var_2128_cast_fp16)[name = tensor<string, []>("op_2226_cast_fp16")];
+            tensor<int32, [4]> var_2233_begin_0 = const()[name = tensor<string, []>("op_2233_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2233_end_0 = const()[name = tensor<string, []>("op_2233_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2233_end_mask_0 = const()[name = tensor<string, []>("op_2233_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2233_cast_fp16 = slice_by_index(begin = var_2233_begin_0, end = var_2233_end_0, end_mask = var_2233_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2233_cast_fp16")];
+            tensor<int32, [4]> var_2240_begin_0 = const()[name = tensor<string, []>("op_2240_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2240_end_0 = const()[name = tensor<string, []>("op_2240_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2240_end_mask_0 = const()[name = tensor<string, []>("op_2240_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2240_cast_fp16")];
+            tensor<int32, [4]> var_2247_begin_0 = const()[name = tensor<string, []>("op_2247_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2247_end_0 = const()[name = tensor<string, []>("op_2247_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2247_end_mask_0 = const()[name = tensor<string, []>("op_2247_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2247_cast_fp16 = slice_by_index(begin = var_2247_begin_0, end = var_2247_end_0, end_mask = var_2247_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2247_cast_fp16")];
+            tensor<int32, [4]> var_2254_begin_0 = const()[name = tensor<string, []>("op_2254_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2254_end_0 = const()[name = tensor<string, []>("op_2254_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2254_end_mask_0 = const()[name = tensor<string, []>("op_2254_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2254_cast_fp16 = slice_by_index(begin = var_2254_begin_0, end = var_2254_end_0, end_mask = var_2254_end_mask_0, x = var_2132_cast_fp16)[name = tensor<string, []>("op_2254_cast_fp16")];
+            tensor<int32, [4]> var_2261_begin_0 = const()[name = tensor<string, []>("op_2261_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2261_end_0 = const()[name = tensor<string, []>("op_2261_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2261_end_mask_0 = const()[name = tensor<string, []>("op_2261_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2261_cast_fp16 = slice_by_index(begin = var_2261_begin_0, end = var_2261_end_0, end_mask = var_2261_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2261_cast_fp16")];
+            tensor<int32, [4]> var_2268_begin_0 = const()[name = tensor<string, []>("op_2268_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2268_end_0 = const()[name = tensor<string, []>("op_2268_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2268_end_mask_0 = const()[name = tensor<string, []>("op_2268_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2268_cast_fp16")];
+            tensor<int32, [4]> var_2275_begin_0 = const()[name = tensor<string, []>("op_2275_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2275_end_0 = const()[name = tensor<string, []>("op_2275_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2275_end_mask_0 = const()[name = tensor<string, []>("op_2275_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2275_cast_fp16 = slice_by_index(begin = var_2275_begin_0, end = var_2275_end_0, end_mask = var_2275_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2275_cast_fp16")];
+            tensor<int32, [4]> var_2282_begin_0 = const()[name = tensor<string, []>("op_2282_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2282_end_0 = const()[name = tensor<string, []>("op_2282_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2282_end_mask_0 = const()[name = tensor<string, []>("op_2282_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2282_cast_fp16 = slice_by_index(begin = var_2282_begin_0, end = var_2282_end_0, end_mask = var_2282_end_mask_0, x = var_2136_cast_fp16)[name = tensor<string, []>("op_2282_cast_fp16")];
+            tensor<int32, [4]> var_2289_begin_0 = const()[name = tensor<string, []>("op_2289_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2289_end_0 = const()[name = tensor<string, []>("op_2289_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2289_end_mask_0 = const()[name = tensor<string, []>("op_2289_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2289_cast_fp16 = slice_by_index(begin = var_2289_begin_0, end = var_2289_end_0, end_mask = var_2289_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2289_cast_fp16")];
+            tensor<int32, [4]> var_2296_begin_0 = const()[name = tensor<string, []>("op_2296_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2296_end_0 = const()[name = tensor<string, []>("op_2296_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2296_end_mask_0 = const()[name = tensor<string, []>("op_2296_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2296_cast_fp16 = slice_by_index(begin = var_2296_begin_0, end = var_2296_end_0, end_mask = var_2296_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2296_cast_fp16")];
+            tensor<int32, [4]> var_2303_begin_0 = const()[name = tensor<string, []>("op_2303_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2303_end_0 = const()[name = tensor<string, []>("op_2303_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2303_end_mask_0 = const()[name = tensor<string, []>("op_2303_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2303_cast_fp16 = slice_by_index(begin = var_2303_begin_0, end = var_2303_end_0, end_mask = var_2303_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2303_cast_fp16")];
+            tensor<int32, [4]> var_2310_begin_0 = const()[name = tensor<string, []>("op_2310_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2310_end_0 = const()[name = tensor<string, []>("op_2310_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2310_end_mask_0 = const()[name = tensor<string, []>("op_2310_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = var_2140_cast_fp16)[name = tensor<string, []>("op_2310_cast_fp16")];
+            tensor<int32, [4]> var_2317_begin_0 = const()[name = tensor<string, []>("op_2317_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2317_end_0 = const()[name = tensor<string, []>("op_2317_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2317_end_mask_0 = const()[name = tensor<string, []>("op_2317_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2317_cast_fp16 = slice_by_index(begin = var_2317_begin_0, end = var_2317_end_0, end_mask = var_2317_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2317_cast_fp16")];
+            tensor<int32, [4]> var_2324_begin_0 = const()[name = tensor<string, []>("op_2324_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2324_end_0 = const()[name = tensor<string, []>("op_2324_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2324_end_mask_0 = const()[name = tensor<string, []>("op_2324_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2324_cast_fp16 = slice_by_index(begin = var_2324_begin_0, end = var_2324_end_0, end_mask = var_2324_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2324_cast_fp16")];
+            tensor<int32, [4]> var_2331_begin_0 = const()[name = tensor<string, []>("op_2331_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2331_end_0 = const()[name = tensor<string, []>("op_2331_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2331_end_mask_0 = const()[name = tensor<string, []>("op_2331_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2331_cast_fp16 = slice_by_index(begin = var_2331_begin_0, end = var_2331_end_0, end_mask = var_2331_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2331_cast_fp16")];
+            tensor<int32, [4]> var_2338_begin_0 = const()[name = tensor<string, []>("op_2338_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2338_end_0 = const()[name = tensor<string, []>("op_2338_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2338_end_mask_0 = const()[name = tensor<string, []>("op_2338_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = var_2144_cast_fp16)[name = tensor<string, []>("op_2338_cast_fp16")];
+            tensor<int32, [4]> var_2345_begin_0 = const()[name = tensor<string, []>("op_2345_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2345_end_0 = const()[name = tensor<string, []>("op_2345_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2345_end_mask_0 = const()[name = tensor<string, []>("op_2345_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2345_cast_fp16 = slice_by_index(begin = var_2345_begin_0, end = var_2345_end_0, end_mask = var_2345_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2345_cast_fp16")];
+            tensor<int32, [4]> var_2352_begin_0 = const()[name = tensor<string, []>("op_2352_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2352_end_0 = const()[name = tensor<string, []>("op_2352_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2352_end_mask_0 = const()[name = tensor<string, []>("op_2352_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2352_cast_fp16 = slice_by_index(begin = var_2352_begin_0, end = var_2352_end_0, end_mask = var_2352_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2352_cast_fp16")];
+            tensor<int32, [4]> var_2359_begin_0 = const()[name = tensor<string, []>("op_2359_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2359_end_0 = const()[name = tensor<string, []>("op_2359_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2359_end_mask_0 = const()[name = tensor<string, []>("op_2359_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2359_cast_fp16 = slice_by_index(begin = var_2359_begin_0, end = var_2359_end_0, end_mask = var_2359_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2359_cast_fp16")];
+            tensor<int32, [4]> var_2366_begin_0 = const()[name = tensor<string, []>("op_2366_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2366_end_0 = const()[name = tensor<string, []>("op_2366_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2366_end_mask_0 = const()[name = tensor<string, []>("op_2366_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = var_2148_cast_fp16)[name = tensor<string, []>("op_2366_cast_fp16")];
+            tensor<int32, [4]> var_2373_begin_0 = const()[name = tensor<string, []>("op_2373_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2373_end_0 = const()[name = tensor<string, []>("op_2373_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2373_end_mask_0 = const()[name = tensor<string, []>("op_2373_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2373_cast_fp16 = slice_by_index(begin = var_2373_begin_0, end = var_2373_end_0, end_mask = var_2373_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2373_cast_fp16")];
+            tensor<int32, [4]> var_2380_begin_0 = const()[name = tensor<string, []>("op_2380_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2380_end_0 = const()[name = tensor<string, []>("op_2380_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2380_end_mask_0 = const()[name = tensor<string, []>("op_2380_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2380_cast_fp16 = slice_by_index(begin = var_2380_begin_0, end = var_2380_end_0, end_mask = var_2380_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2380_cast_fp16")];
+            tensor<int32, [4]> var_2387_begin_0 = const()[name = tensor<string, []>("op_2387_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2387_end_0 = const()[name = tensor<string, []>("op_2387_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2387_end_mask_0 = const()[name = tensor<string, []>("op_2387_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2387_cast_fp16 = slice_by_index(begin = var_2387_begin_0, end = var_2387_end_0, end_mask = var_2387_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2387_cast_fp16")];
+            tensor<int32, [4]> var_2394_begin_0 = const()[name = tensor<string, []>("op_2394_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2394_end_0 = const()[name = tensor<string, []>("op_2394_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2394_end_mask_0 = const()[name = tensor<string, []>("op_2394_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = var_2152_cast_fp16)[name = tensor<string, []>("op_2394_cast_fp16")];
+            tensor<int32, [4]> var_2401_begin_0 = const()[name = tensor<string, []>("op_2401_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2401_end_0 = const()[name = tensor<string, []>("op_2401_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2401_end_mask_0 = const()[name = tensor<string, []>("op_2401_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2401_cast_fp16 = slice_by_index(begin = var_2401_begin_0, end = var_2401_end_0, end_mask = var_2401_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2401_cast_fp16")];
+            tensor<int32, [4]> var_2408_begin_0 = const()[name = tensor<string, []>("op_2408_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2408_end_0 = const()[name = tensor<string, []>("op_2408_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2408_end_mask_0 = const()[name = tensor<string, []>("op_2408_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2408_cast_fp16 = slice_by_index(begin = var_2408_begin_0, end = var_2408_end_0, end_mask = var_2408_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2408_cast_fp16")];
+            tensor<int32, [4]> var_2415_begin_0 = const()[name = tensor<string, []>("op_2415_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2415_end_0 = const()[name = tensor<string, []>("op_2415_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2415_end_mask_0 = const()[name = tensor<string, []>("op_2415_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2415_cast_fp16 = slice_by_index(begin = var_2415_begin_0, end = var_2415_end_0, end_mask = var_2415_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2415_cast_fp16")];
+            tensor<int32, [4]> var_2422_begin_0 = const()[name = tensor<string, []>("op_2422_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2422_end_0 = const()[name = tensor<string, []>("op_2422_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2422_end_mask_0 = const()[name = tensor<string, []>("op_2422_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2422_cast_fp16 = slice_by_index(begin = var_2422_begin_0, end = var_2422_end_0, end_mask = var_2422_end_mask_0, x = var_2156_cast_fp16)[name = tensor<string, []>("op_2422_cast_fp16")];
+            tensor<int32, [4]> var_2429_begin_0 = const()[name = tensor<string, []>("op_2429_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2429_end_0 = const()[name = tensor<string, []>("op_2429_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2429_end_mask_0 = const()[name = tensor<string, []>("op_2429_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2429_cast_fp16 = slice_by_index(begin = var_2429_begin_0, end = var_2429_end_0, end_mask = var_2429_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2429_cast_fp16")];
+            tensor<int32, [4]> var_2436_begin_0 = const()[name = tensor<string, []>("op_2436_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2436_end_0 = const()[name = tensor<string, []>("op_2436_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2436_end_mask_0 = const()[name = tensor<string, []>("op_2436_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2436_cast_fp16 = slice_by_index(begin = var_2436_begin_0, end = var_2436_end_0, end_mask = var_2436_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2436_cast_fp16")];
+            tensor<int32, [4]> var_2443_begin_0 = const()[name = tensor<string, []>("op_2443_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2443_end_0 = const()[name = tensor<string, []>("op_2443_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2443_end_mask_0 = const()[name = tensor<string, []>("op_2443_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2443_cast_fp16 = slice_by_index(begin = var_2443_begin_0, end = var_2443_end_0, end_mask = var_2443_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2443_cast_fp16")];
+            tensor<int32, [4]> var_2450_begin_0 = const()[name = tensor<string, []>("op_2450_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2450_end_0 = const()[name = tensor<string, []>("op_2450_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2450_end_mask_0 = const()[name = tensor<string, []>("op_2450_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2450_cast_fp16 = slice_by_index(begin = var_2450_begin_0, end = var_2450_end_0, end_mask = var_2450_end_mask_0, x = var_2160_cast_fp16)[name = tensor<string, []>("op_2450_cast_fp16")];
+            tensor<int32, [4]> var_2457_begin_0 = const()[name = tensor<string, []>("op_2457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2457_end_0 = const()[name = tensor<string, []>("op_2457_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2457_end_mask_0 = const()[name = tensor<string, []>("op_2457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2457_cast_fp16 = slice_by_index(begin = var_2457_begin_0, end = var_2457_end_0, end_mask = var_2457_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<int32, [4]> var_2464_begin_0 = const()[name = tensor<string, []>("op_2464_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2464_end_0 = const()[name = tensor<string, []>("op_2464_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2464_end_mask_0 = const()[name = tensor<string, []>("op_2464_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2464_cast_fp16 = slice_by_index(begin = var_2464_begin_0, end = var_2464_end_0, end_mask = var_2464_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2464_cast_fp16")];
+            tensor<int32, [4]> var_2471_begin_0 = const()[name = tensor<string, []>("op_2471_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2471_end_0 = const()[name = tensor<string, []>("op_2471_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2471_end_mask_0 = const()[name = tensor<string, []>("op_2471_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2471_cast_fp16 = slice_by_index(begin = var_2471_begin_0, end = var_2471_end_0, end_mask = var_2471_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2471_cast_fp16")];
+            tensor<int32, [4]> var_2478_begin_0 = const()[name = tensor<string, []>("op_2478_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2478_end_0 = const()[name = tensor<string, []>("op_2478_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2478_end_mask_0 = const()[name = tensor<string, []>("op_2478_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2478_cast_fp16 = slice_by_index(begin = var_2478_begin_0, end = var_2478_end_0, end_mask = var_2478_end_mask_0, x = var_2164_cast_fp16)[name = tensor<string, []>("op_2478_cast_fp16")];
+            tensor<int32, [4]> var_2485_begin_0 = const()[name = tensor<string, []>("op_2485_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2485_end_0 = const()[name = tensor<string, []>("op_2485_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_2485_end_mask_0 = const()[name = tensor<string, []>("op_2485_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2485_cast_fp16 = slice_by_index(begin = var_2485_begin_0, end = var_2485_end_0, end_mask = var_2485_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2485_cast_fp16")];
+            tensor<int32, [4]> var_2492_begin_0 = const()[name = tensor<string, []>("op_2492_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_2492_end_0 = const()[name = tensor<string, []>("op_2492_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_2492_end_mask_0 = const()[name = tensor<string, []>("op_2492_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2492_cast_fp16 = slice_by_index(begin = var_2492_begin_0, end = var_2492_end_0, end_mask = var_2492_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2492_cast_fp16")];
+            tensor<int32, [4]> var_2499_begin_0 = const()[name = tensor<string, []>("op_2499_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_2499_end_0 = const()[name = tensor<string, []>("op_2499_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_2499_end_mask_0 = const()[name = tensor<string, []>("op_2499_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2499_cast_fp16 = slice_by_index(begin = var_2499_begin_0, end = var_2499_end_0, end_mask = var_2499_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2499_cast_fp16")];
+            tensor<int32, [4]> var_2506_begin_0 = const()[name = tensor<string, []>("op_2506_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_2506_end_0 = const()[name = tensor<string, []>("op_2506_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2506_end_mask_0 = const()[name = tensor<string, []>("op_2506_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_2506_cast_fp16 = slice_by_index(begin = var_2506_begin_0, end = var_2506_end_0, end_mask = var_2506_end_mask_0, x = var_2168_cast_fp16)[name = tensor<string, []>("op_2506_cast_fp16")];
+            tensor<int32, [4]> k_5_perm_0 = const()[name = tensor<string, []>("k_5_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_2511_begin_0 = const()[name = tensor<string, []>("op_2511_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2511_end_0 = const()[name = tensor<string, []>("op_2511_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_2511_end_mask_0 = const()[name = tensor<string, []>("op_2511_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_9 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2511_cast_fp16 = slice_by_index(begin = var_2511_begin_0, end = var_2511_end_0, end_mask = var_2511_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2511_cast_fp16")];
+            tensor<int32, [4]> var_2515_begin_0 = const()[name = tensor<string, []>("op_2515_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_2515_end_0 = const()[name = tensor<string, []>("op_2515_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_2515_end_mask_0 = const()[name = tensor<string, []>("op_2515_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2515_cast_fp16 = slice_by_index(begin = var_2515_begin_0, end = var_2515_end_0, end_mask = var_2515_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2515_cast_fp16")];
+            tensor<int32, [4]> var_2519_begin_0 = const()[name = tensor<string, []>("op_2519_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_2519_end_0 = const()[name = tensor<string, []>("op_2519_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_2519_end_mask_0 = const()[name = tensor<string, []>("op_2519_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2519_cast_fp16 = slice_by_index(begin = var_2519_begin_0, end = var_2519_end_0, end_mask = var_2519_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2519_cast_fp16")];
+            tensor<int32, [4]> var_2523_begin_0 = const()[name = tensor<string, []>("op_2523_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_2523_end_0 = const()[name = tensor<string, []>("op_2523_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_2523_end_mask_0 = const()[name = tensor<string, []>("op_2523_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2523_cast_fp16 = slice_by_index(begin = var_2523_begin_0, end = var_2523_end_0, end_mask = var_2523_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2523_cast_fp16")];
+            tensor<int32, [4]> var_2527_begin_0 = const()[name = tensor<string, []>("op_2527_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_2527_end_0 = const()[name = tensor<string, []>("op_2527_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_2527_end_mask_0 = const()[name = tensor<string, []>("op_2527_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2527_cast_fp16 = slice_by_index(begin = var_2527_begin_0, end = var_2527_end_0, end_mask = var_2527_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2527_cast_fp16")];
+            tensor<int32, [4]> var_2531_begin_0 = const()[name = tensor<string, []>("op_2531_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_2531_end_0 = const()[name = tensor<string, []>("op_2531_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_2531_end_mask_0 = const()[name = tensor<string, []>("op_2531_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2531_cast_fp16 = slice_by_index(begin = var_2531_begin_0, end = var_2531_end_0, end_mask = var_2531_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2531_cast_fp16")];
+            tensor<int32, [4]> var_2535_begin_0 = const()[name = tensor<string, []>("op_2535_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_2535_end_0 = const()[name = tensor<string, []>("op_2535_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_2535_end_mask_0 = const()[name = tensor<string, []>("op_2535_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2535_cast_fp16 = slice_by_index(begin = var_2535_begin_0, end = var_2535_end_0, end_mask = var_2535_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2535_cast_fp16")];
+            tensor<int32, [4]> var_2539_begin_0 = const()[name = tensor<string, []>("op_2539_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_2539_end_0 = const()[name = tensor<string, []>("op_2539_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_2539_end_mask_0 = const()[name = tensor<string, []>("op_2539_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2539_cast_fp16 = slice_by_index(begin = var_2539_begin_0, end = var_2539_end_0, end_mask = var_2539_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2539_cast_fp16")];
+            tensor<int32, [4]> var_2543_begin_0 = const()[name = tensor<string, []>("op_2543_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_2543_end_0 = const()[name = tensor<string, []>("op_2543_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_2543_end_mask_0 = const()[name = tensor<string, []>("op_2543_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2543_cast_fp16 = slice_by_index(begin = var_2543_begin_0, end = var_2543_end_0, end_mask = var_2543_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2543_cast_fp16")];
+            tensor<int32, [4]> var_2547_begin_0 = const()[name = tensor<string, []>("op_2547_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_2547_end_0 = const()[name = tensor<string, []>("op_2547_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_2547_end_mask_0 = const()[name = tensor<string, []>("op_2547_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2547_cast_fp16 = slice_by_index(begin = var_2547_begin_0, end = var_2547_end_0, end_mask = var_2547_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2547_cast_fp16")];
+            tensor<int32, [4]> var_2551_begin_0 = const()[name = tensor<string, []>("op_2551_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_2551_end_0 = const()[name = tensor<string, []>("op_2551_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_2551_end_mask_0 = const()[name = tensor<string, []>("op_2551_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2551_cast_fp16 = slice_by_index(begin = var_2551_begin_0, end = var_2551_end_0, end_mask = var_2551_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2551_cast_fp16")];
+            tensor<int32, [4]> var_2555_begin_0 = const()[name = tensor<string, []>("op_2555_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_2555_end_0 = const()[name = tensor<string, []>("op_2555_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_2555_end_mask_0 = const()[name = tensor<string, []>("op_2555_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_2555_cast_fp16 = slice_by_index(begin = var_2555_begin_0, end = var_2555_end_0, end_mask = var_2555_end_mask_0, x = transpose_9)[name = tensor<string, []>("op_2555_cast_fp16")];
+            tensor<int32, [4]> var_2557_begin_0 = const()[name = tensor<string, []>("op_2557_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2557_end_0 = const()[name = tensor<string, []>("op_2557_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2557_end_mask_0 = const()[name = tensor<string, []>("op_2557_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = var_2557_end_0, end_mask = var_2557_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2557_cast_fp16")];
+            tensor<int32, [4]> var_2561_begin_0 = const()[name = tensor<string, []>("op_2561_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2561_end_0 = const()[name = tensor<string, []>("op_2561_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2561_end_mask_0 = const()[name = tensor<string, []>("op_2561_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2561_cast_fp16 = slice_by_index(begin = var_2561_begin_0, end = var_2561_end_0, end_mask = var_2561_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2561_cast_fp16")];
+            tensor<int32, [4]> var_2565_begin_0 = const()[name = tensor<string, []>("op_2565_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2565_end_0 = const()[name = tensor<string, []>("op_2565_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2565_end_mask_0 = const()[name = tensor<string, []>("op_2565_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16 = slice_by_index(begin = var_2565_begin_0, end = var_2565_end_0, end_mask = var_2565_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2565_cast_fp16")];
+            tensor<int32, [4]> var_2569_begin_0 = const()[name = tensor<string, []>("op_2569_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2569_end_0 = const()[name = tensor<string, []>("op_2569_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2569_end_mask_0 = const()[name = tensor<string, []>("op_2569_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2569_cast_fp16 = slice_by_index(begin = var_2569_begin_0, end = var_2569_end_0, end_mask = var_2569_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2569_cast_fp16")];
+            tensor<int32, [4]> var_2573_begin_0 = const()[name = tensor<string, []>("op_2573_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2573_end_0 = const()[name = tensor<string, []>("op_2573_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2573_end_mask_0 = const()[name = tensor<string, []>("op_2573_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2573_cast_fp16 = slice_by_index(begin = var_2573_begin_0, end = var_2573_end_0, end_mask = var_2573_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2573_cast_fp16")];
+            tensor<int32, [4]> var_2577_begin_0 = const()[name = tensor<string, []>("op_2577_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2577_end_0 = const()[name = tensor<string, []>("op_2577_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2577_end_mask_0 = const()[name = tensor<string, []>("op_2577_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2577_cast_fp16 = slice_by_index(begin = var_2577_begin_0, end = var_2577_end_0, end_mask = var_2577_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2577_cast_fp16")];
+            tensor<int32, [4]> var_2581_begin_0 = const()[name = tensor<string, []>("op_2581_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_2581_end_0 = const()[name = tensor<string, []>("op_2581_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_2581_end_mask_0 = const()[name = tensor<string, []>("op_2581_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2581_cast_fp16 = slice_by_index(begin = var_2581_begin_0, end = var_2581_end_0, end_mask = var_2581_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2581_cast_fp16")];
+            tensor<int32, [4]> var_2585_begin_0 = const()[name = tensor<string, []>("op_2585_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_2585_end_0 = const()[name = tensor<string, []>("op_2585_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_2585_end_mask_0 = const()[name = tensor<string, []>("op_2585_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2585_cast_fp16 = slice_by_index(begin = var_2585_begin_0, end = var_2585_end_0, end_mask = var_2585_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2585_cast_fp16")];
+            tensor<int32, [4]> var_2589_begin_0 = const()[name = tensor<string, []>("op_2589_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_2589_end_0 = const()[name = tensor<string, []>("op_2589_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_2589_end_mask_0 = const()[name = tensor<string, []>("op_2589_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = var_2589_end_0, end_mask = var_2589_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2589_cast_fp16")];
+            tensor<int32, [4]> var_2593_begin_0 = const()[name = tensor<string, []>("op_2593_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_2593_end_0 = const()[name = tensor<string, []>("op_2593_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_2593_end_mask_0 = const()[name = tensor<string, []>("op_2593_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2593_cast_fp16 = slice_by_index(begin = var_2593_begin_0, end = var_2593_end_0, end_mask = var_2593_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2593_cast_fp16")];
+            tensor<int32, [4]> var_2597_begin_0 = const()[name = tensor<string, []>("op_2597_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_2597_end_0 = const()[name = tensor<string, []>("op_2597_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_2597_end_mask_0 = const()[name = tensor<string, []>("op_2597_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16 = slice_by_index(begin = var_2597_begin_0, end = var_2597_end_0, end_mask = var_2597_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2597_cast_fp16")];
+            tensor<int32, [4]> var_2601_begin_0 = const()[name = tensor<string, []>("op_2601_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_2601_end_0 = const()[name = tensor<string, []>("op_2601_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_2601_end_mask_0 = const()[name = tensor<string, []>("op_2601_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2601_cast_fp16 = slice_by_index(begin = var_2601_begin_0, end = var_2601_end_0, end_mask = var_2601_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_2601_cast_fp16")];
+            tensor<string, []> var_2605_equation_0 = const()[name = tensor<string, []>("op_2605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2605_cast_fp16 = einsum(equation = var_2605_equation_0, values = (var_2511_cast_fp16, var_2177_cast_fp16))[name = tensor<string, []>("op_2605_cast_fp16")];
+            tensor<fp16, []> var_2606_to_fp16 = const()[name = tensor<string, []>("op_2606_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_193_cast_fp16 = mul(x = var_2605_cast_fp16, y = var_2606_to_fp16)[name = tensor<string, []>("aw_chunk_193_cast_fp16")];
+            tensor<string, []> var_2609_equation_0 = const()[name = tensor<string, []>("op_2609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2609_cast_fp16 = einsum(equation = var_2609_equation_0, values = (var_2511_cast_fp16, var_2184_cast_fp16))[name = tensor<string, []>("op_2609_cast_fp16")];
+            tensor<fp16, []> var_2610_to_fp16 = const()[name = tensor<string, []>("op_2610_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_195_cast_fp16 = mul(x = var_2609_cast_fp16, y = var_2610_to_fp16)[name = tensor<string, []>("aw_chunk_195_cast_fp16")];
+            tensor<string, []> var_2613_equation_0 = const()[name = tensor<string, []>("op_2613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2613_cast_fp16 = einsum(equation = var_2613_equation_0, values = (var_2511_cast_fp16, var_2191_cast_fp16))[name = tensor<string, []>("op_2613_cast_fp16")];
+            tensor<fp16, []> var_2614_to_fp16 = const()[name = tensor<string, []>("op_2614_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_197_cast_fp16 = mul(x = var_2613_cast_fp16, y = var_2614_to_fp16)[name = tensor<string, []>("aw_chunk_197_cast_fp16")];
+            tensor<string, []> var_2617_equation_0 = const()[name = tensor<string, []>("op_2617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2617_cast_fp16 = einsum(equation = var_2617_equation_0, values = (var_2511_cast_fp16, var_2198_cast_fp16))[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<fp16, []> var_2618_to_fp16 = const()[name = tensor<string, []>("op_2618_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_199_cast_fp16 = mul(x = var_2617_cast_fp16, y = var_2618_to_fp16)[name = tensor<string, []>("aw_chunk_199_cast_fp16")];
+            tensor<string, []> var_2621_equation_0 = const()[name = tensor<string, []>("op_2621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2621_cast_fp16 = einsum(equation = var_2621_equation_0, values = (var_2515_cast_fp16, var_2205_cast_fp16))[name = tensor<string, []>("op_2621_cast_fp16")];
+            tensor<fp16, []> var_2622_to_fp16 = const()[name = tensor<string, []>("op_2622_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_201_cast_fp16 = mul(x = var_2621_cast_fp16, y = var_2622_to_fp16)[name = tensor<string, []>("aw_chunk_201_cast_fp16")];
+            tensor<string, []> var_2625_equation_0 = const()[name = tensor<string, []>("op_2625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2625_cast_fp16 = einsum(equation = var_2625_equation_0, values = (var_2515_cast_fp16, var_2212_cast_fp16))[name = tensor<string, []>("op_2625_cast_fp16")];
+            tensor<fp16, []> var_2626_to_fp16 = const()[name = tensor<string, []>("op_2626_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_203_cast_fp16 = mul(x = var_2625_cast_fp16, y = var_2626_to_fp16)[name = tensor<string, []>("aw_chunk_203_cast_fp16")];
+            tensor<string, []> var_2629_equation_0 = const()[name = tensor<string, []>("op_2629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2629_cast_fp16 = einsum(equation = var_2629_equation_0, values = (var_2515_cast_fp16, var_2219_cast_fp16))[name = tensor<string, []>("op_2629_cast_fp16")];
+            tensor<fp16, []> var_2630_to_fp16 = const()[name = tensor<string, []>("op_2630_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_205_cast_fp16 = mul(x = var_2629_cast_fp16, y = var_2630_to_fp16)[name = tensor<string, []>("aw_chunk_205_cast_fp16")];
+            tensor<string, []> var_2633_equation_0 = const()[name = tensor<string, []>("op_2633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2633_cast_fp16 = einsum(equation = var_2633_equation_0, values = (var_2515_cast_fp16, var_2226_cast_fp16))[name = tensor<string, []>("op_2633_cast_fp16")];
+            tensor<fp16, []> var_2634_to_fp16 = const()[name = tensor<string, []>("op_2634_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_207_cast_fp16 = mul(x = var_2633_cast_fp16, y = var_2634_to_fp16)[name = tensor<string, []>("aw_chunk_207_cast_fp16")];
+            tensor<string, []> var_2637_equation_0 = const()[name = tensor<string, []>("op_2637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2637_cast_fp16 = einsum(equation = var_2637_equation_0, values = (var_2519_cast_fp16, var_2233_cast_fp16))[name = tensor<string, []>("op_2637_cast_fp16")];
+            tensor<fp16, []> var_2638_to_fp16 = const()[name = tensor<string, []>("op_2638_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_209_cast_fp16 = mul(x = var_2637_cast_fp16, y = var_2638_to_fp16)[name = tensor<string, []>("aw_chunk_209_cast_fp16")];
+            tensor<string, []> var_2641_equation_0 = const()[name = tensor<string, []>("op_2641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2641_cast_fp16 = einsum(equation = var_2641_equation_0, values = (var_2519_cast_fp16, var_2240_cast_fp16))[name = tensor<string, []>("op_2641_cast_fp16")];
+            tensor<fp16, []> var_2642_to_fp16 = const()[name = tensor<string, []>("op_2642_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_211_cast_fp16 = mul(x = var_2641_cast_fp16, y = var_2642_to_fp16)[name = tensor<string, []>("aw_chunk_211_cast_fp16")];
+            tensor<string, []> var_2645_equation_0 = const()[name = tensor<string, []>("op_2645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2645_cast_fp16 = einsum(equation = var_2645_equation_0, values = (var_2519_cast_fp16, var_2247_cast_fp16))[name = tensor<string, []>("op_2645_cast_fp16")];
+            tensor<fp16, []> var_2646_to_fp16 = const()[name = tensor<string, []>("op_2646_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_213_cast_fp16 = mul(x = var_2645_cast_fp16, y = var_2646_to_fp16)[name = tensor<string, []>("aw_chunk_213_cast_fp16")];
+            tensor<string, []> var_2649_equation_0 = const()[name = tensor<string, []>("op_2649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2649_cast_fp16 = einsum(equation = var_2649_equation_0, values = (var_2519_cast_fp16, var_2254_cast_fp16))[name = tensor<string, []>("op_2649_cast_fp16")];
+            tensor<fp16, []> var_2650_to_fp16 = const()[name = tensor<string, []>("op_2650_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_215_cast_fp16 = mul(x = var_2649_cast_fp16, y = var_2650_to_fp16)[name = tensor<string, []>("aw_chunk_215_cast_fp16")];
+            tensor<string, []> var_2653_equation_0 = const()[name = tensor<string, []>("op_2653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2653_cast_fp16 = einsum(equation = var_2653_equation_0, values = (var_2523_cast_fp16, var_2261_cast_fp16))[name = tensor<string, []>("op_2653_cast_fp16")];
+            tensor<fp16, []> var_2654_to_fp16 = const()[name = tensor<string, []>("op_2654_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_217_cast_fp16 = mul(x = var_2653_cast_fp16, y = var_2654_to_fp16)[name = tensor<string, []>("aw_chunk_217_cast_fp16")];
+            tensor<string, []> var_2657_equation_0 = const()[name = tensor<string, []>("op_2657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2657_cast_fp16 = einsum(equation = var_2657_equation_0, values = (var_2523_cast_fp16, var_2268_cast_fp16))[name = tensor<string, []>("op_2657_cast_fp16")];
+            tensor<fp16, []> var_2658_to_fp16 = const()[name = tensor<string, []>("op_2658_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_219_cast_fp16 = mul(x = var_2657_cast_fp16, y = var_2658_to_fp16)[name = tensor<string, []>("aw_chunk_219_cast_fp16")];
+            tensor<string, []> var_2661_equation_0 = const()[name = tensor<string, []>("op_2661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2661_cast_fp16 = einsum(equation = var_2661_equation_0, values = (var_2523_cast_fp16, var_2275_cast_fp16))[name = tensor<string, []>("op_2661_cast_fp16")];
+            tensor<fp16, []> var_2662_to_fp16 = const()[name = tensor<string, []>("op_2662_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_221_cast_fp16 = mul(x = var_2661_cast_fp16, y = var_2662_to_fp16)[name = tensor<string, []>("aw_chunk_221_cast_fp16")];
+            tensor<string, []> var_2665_equation_0 = const()[name = tensor<string, []>("op_2665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2665_cast_fp16 = einsum(equation = var_2665_equation_0, values = (var_2523_cast_fp16, var_2282_cast_fp16))[name = tensor<string, []>("op_2665_cast_fp16")];
+            tensor<fp16, []> var_2666_to_fp16 = const()[name = tensor<string, []>("op_2666_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_223_cast_fp16 = mul(x = var_2665_cast_fp16, y = var_2666_to_fp16)[name = tensor<string, []>("aw_chunk_223_cast_fp16")];
+            tensor<string, []> var_2669_equation_0 = const()[name = tensor<string, []>("op_2669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2669_cast_fp16 = einsum(equation = var_2669_equation_0, values = (var_2527_cast_fp16, var_2289_cast_fp16))[name = tensor<string, []>("op_2669_cast_fp16")];
+            tensor<fp16, []> var_2670_to_fp16 = const()[name = tensor<string, []>("op_2670_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_225_cast_fp16 = mul(x = var_2669_cast_fp16, y = var_2670_to_fp16)[name = tensor<string, []>("aw_chunk_225_cast_fp16")];
+            tensor<string, []> var_2673_equation_0 = const()[name = tensor<string, []>("op_2673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2673_cast_fp16 = einsum(equation = var_2673_equation_0, values = (var_2527_cast_fp16, var_2296_cast_fp16))[name = tensor<string, []>("op_2673_cast_fp16")];
+            tensor<fp16, []> var_2674_to_fp16 = const()[name = tensor<string, []>("op_2674_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_227_cast_fp16 = mul(x = var_2673_cast_fp16, y = var_2674_to_fp16)[name = tensor<string, []>("aw_chunk_227_cast_fp16")];
+            tensor<string, []> var_2677_equation_0 = const()[name = tensor<string, []>("op_2677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2677_cast_fp16 = einsum(equation = var_2677_equation_0, values = (var_2527_cast_fp16, var_2303_cast_fp16))[name = tensor<string, []>("op_2677_cast_fp16")];
+            tensor<fp16, []> var_2678_to_fp16 = const()[name = tensor<string, []>("op_2678_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_229_cast_fp16 = mul(x = var_2677_cast_fp16, y = var_2678_to_fp16)[name = tensor<string, []>("aw_chunk_229_cast_fp16")];
+            tensor<string, []> var_2681_equation_0 = const()[name = tensor<string, []>("op_2681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2681_cast_fp16 = einsum(equation = var_2681_equation_0, values = (var_2527_cast_fp16, var_2310_cast_fp16))[name = tensor<string, []>("op_2681_cast_fp16")];
+            tensor<fp16, []> var_2682_to_fp16 = const()[name = tensor<string, []>("op_2682_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_231_cast_fp16 = mul(x = var_2681_cast_fp16, y = var_2682_to_fp16)[name = tensor<string, []>("aw_chunk_231_cast_fp16")];
+            tensor<string, []> var_2685_equation_0 = const()[name = tensor<string, []>("op_2685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2685_cast_fp16 = einsum(equation = var_2685_equation_0, values = (var_2531_cast_fp16, var_2317_cast_fp16))[name = tensor<string, []>("op_2685_cast_fp16")];
+            tensor<fp16, []> var_2686_to_fp16 = const()[name = tensor<string, []>("op_2686_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_233_cast_fp16 = mul(x = var_2685_cast_fp16, y = var_2686_to_fp16)[name = tensor<string, []>("aw_chunk_233_cast_fp16")];
+            tensor<string, []> var_2689_equation_0 = const()[name = tensor<string, []>("op_2689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2689_cast_fp16 = einsum(equation = var_2689_equation_0, values = (var_2531_cast_fp16, var_2324_cast_fp16))[name = tensor<string, []>("op_2689_cast_fp16")];
+            tensor<fp16, []> var_2690_to_fp16 = const()[name = tensor<string, []>("op_2690_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_235_cast_fp16 = mul(x = var_2689_cast_fp16, y = var_2690_to_fp16)[name = tensor<string, []>("aw_chunk_235_cast_fp16")];
+            tensor<string, []> var_2693_equation_0 = const()[name = tensor<string, []>("op_2693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2693_cast_fp16 = einsum(equation = var_2693_equation_0, values = (var_2531_cast_fp16, var_2331_cast_fp16))[name = tensor<string, []>("op_2693_cast_fp16")];
+            tensor<fp16, []> var_2694_to_fp16 = const()[name = tensor<string, []>("op_2694_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_237_cast_fp16 = mul(x = var_2693_cast_fp16, y = var_2694_to_fp16)[name = tensor<string, []>("aw_chunk_237_cast_fp16")];
+            tensor<string, []> var_2697_equation_0 = const()[name = tensor<string, []>("op_2697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2697_cast_fp16 = einsum(equation = var_2697_equation_0, values = (var_2531_cast_fp16, var_2338_cast_fp16))[name = tensor<string, []>("op_2697_cast_fp16")];
+            tensor<fp16, []> var_2698_to_fp16 = const()[name = tensor<string, []>("op_2698_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_239_cast_fp16 = mul(x = var_2697_cast_fp16, y = var_2698_to_fp16)[name = tensor<string, []>("aw_chunk_239_cast_fp16")];
+            tensor<string, []> var_2701_equation_0 = const()[name = tensor<string, []>("op_2701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2701_cast_fp16 = einsum(equation = var_2701_equation_0, values = (var_2535_cast_fp16, var_2345_cast_fp16))[name = tensor<string, []>("op_2701_cast_fp16")];
+            tensor<fp16, []> var_2702_to_fp16 = const()[name = tensor<string, []>("op_2702_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_241_cast_fp16 = mul(x = var_2701_cast_fp16, y = var_2702_to_fp16)[name = tensor<string, []>("aw_chunk_241_cast_fp16")];
+            tensor<string, []> var_2705_equation_0 = const()[name = tensor<string, []>("op_2705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2705_cast_fp16 = einsum(equation = var_2705_equation_0, values = (var_2535_cast_fp16, var_2352_cast_fp16))[name = tensor<string, []>("op_2705_cast_fp16")];
+            tensor<fp16, []> var_2706_to_fp16 = const()[name = tensor<string, []>("op_2706_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_243_cast_fp16 = mul(x = var_2705_cast_fp16, y = var_2706_to_fp16)[name = tensor<string, []>("aw_chunk_243_cast_fp16")];
+            tensor<string, []> var_2709_equation_0 = const()[name = tensor<string, []>("op_2709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2709_cast_fp16 = einsum(equation = var_2709_equation_0, values = (var_2535_cast_fp16, var_2359_cast_fp16))[name = tensor<string, []>("op_2709_cast_fp16")];
+            tensor<fp16, []> var_2710_to_fp16 = const()[name = tensor<string, []>("op_2710_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_245_cast_fp16 = mul(x = var_2709_cast_fp16, y = var_2710_to_fp16)[name = tensor<string, []>("aw_chunk_245_cast_fp16")];
+            tensor<string, []> var_2713_equation_0 = const()[name = tensor<string, []>("op_2713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2713_cast_fp16 = einsum(equation = var_2713_equation_0, values = (var_2535_cast_fp16, var_2366_cast_fp16))[name = tensor<string, []>("op_2713_cast_fp16")];
+            tensor<fp16, []> var_2714_to_fp16 = const()[name = tensor<string, []>("op_2714_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_247_cast_fp16 = mul(x = var_2713_cast_fp16, y = var_2714_to_fp16)[name = tensor<string, []>("aw_chunk_247_cast_fp16")];
+            tensor<string, []> var_2717_equation_0 = const()[name = tensor<string, []>("op_2717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2717_cast_fp16 = einsum(equation = var_2717_equation_0, values = (var_2539_cast_fp16, var_2373_cast_fp16))[name = tensor<string, []>("op_2717_cast_fp16")];
+            tensor<fp16, []> var_2718_to_fp16 = const()[name = tensor<string, []>("op_2718_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_249_cast_fp16 = mul(x = var_2717_cast_fp16, y = var_2718_to_fp16)[name = tensor<string, []>("aw_chunk_249_cast_fp16")];
+            tensor<string, []> var_2721_equation_0 = const()[name = tensor<string, []>("op_2721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2721_cast_fp16 = einsum(equation = var_2721_equation_0, values = (var_2539_cast_fp16, var_2380_cast_fp16))[name = tensor<string, []>("op_2721_cast_fp16")];
+            tensor<fp16, []> var_2722_to_fp16 = const()[name = tensor<string, []>("op_2722_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_251_cast_fp16 = mul(x = var_2721_cast_fp16, y = var_2722_to_fp16)[name = tensor<string, []>("aw_chunk_251_cast_fp16")];
+            tensor<string, []> var_2725_equation_0 = const()[name = tensor<string, []>("op_2725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2725_cast_fp16 = einsum(equation = var_2725_equation_0, values = (var_2539_cast_fp16, var_2387_cast_fp16))[name = tensor<string, []>("op_2725_cast_fp16")];
+            tensor<fp16, []> var_2726_to_fp16 = const()[name = tensor<string, []>("op_2726_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_253_cast_fp16 = mul(x = var_2725_cast_fp16, y = var_2726_to_fp16)[name = tensor<string, []>("aw_chunk_253_cast_fp16")];
+            tensor<string, []> var_2729_equation_0 = const()[name = tensor<string, []>("op_2729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2729_cast_fp16 = einsum(equation = var_2729_equation_0, values = (var_2539_cast_fp16, var_2394_cast_fp16))[name = tensor<string, []>("op_2729_cast_fp16")];
+            tensor<fp16, []> var_2730_to_fp16 = const()[name = tensor<string, []>("op_2730_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_255_cast_fp16 = mul(x = var_2729_cast_fp16, y = var_2730_to_fp16)[name = tensor<string, []>("aw_chunk_255_cast_fp16")];
+            tensor<string, []> var_2733_equation_0 = const()[name = tensor<string, []>("op_2733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2733_cast_fp16 = einsum(equation = var_2733_equation_0, values = (var_2543_cast_fp16, var_2401_cast_fp16))[name = tensor<string, []>("op_2733_cast_fp16")];
+            tensor<fp16, []> var_2734_to_fp16 = const()[name = tensor<string, []>("op_2734_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_257_cast_fp16 = mul(x = var_2733_cast_fp16, y = var_2734_to_fp16)[name = tensor<string, []>("aw_chunk_257_cast_fp16")];
+            tensor<string, []> var_2737_equation_0 = const()[name = tensor<string, []>("op_2737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2737_cast_fp16 = einsum(equation = var_2737_equation_0, values = (var_2543_cast_fp16, var_2408_cast_fp16))[name = tensor<string, []>("op_2737_cast_fp16")];
+            tensor<fp16, []> var_2738_to_fp16 = const()[name = tensor<string, []>("op_2738_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_259_cast_fp16 = mul(x = var_2737_cast_fp16, y = var_2738_to_fp16)[name = tensor<string, []>("aw_chunk_259_cast_fp16")];
+            tensor<string, []> var_2741_equation_0 = const()[name = tensor<string, []>("op_2741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2741_cast_fp16 = einsum(equation = var_2741_equation_0, values = (var_2543_cast_fp16, var_2415_cast_fp16))[name = tensor<string, []>("op_2741_cast_fp16")];
+            tensor<fp16, []> var_2742_to_fp16 = const()[name = tensor<string, []>("op_2742_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_261_cast_fp16 = mul(x = var_2741_cast_fp16, y = var_2742_to_fp16)[name = tensor<string, []>("aw_chunk_261_cast_fp16")];
+            tensor<string, []> var_2745_equation_0 = const()[name = tensor<string, []>("op_2745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2745_cast_fp16 = einsum(equation = var_2745_equation_0, values = (var_2543_cast_fp16, var_2422_cast_fp16))[name = tensor<string, []>("op_2745_cast_fp16")];
+            tensor<fp16, []> var_2746_to_fp16 = const()[name = tensor<string, []>("op_2746_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_263_cast_fp16 = mul(x = var_2745_cast_fp16, y = var_2746_to_fp16)[name = tensor<string, []>("aw_chunk_263_cast_fp16")];
+            tensor<string, []> var_2749_equation_0 = const()[name = tensor<string, []>("op_2749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2749_cast_fp16 = einsum(equation = var_2749_equation_0, values = (var_2547_cast_fp16, var_2429_cast_fp16))[name = tensor<string, []>("op_2749_cast_fp16")];
+            tensor<fp16, []> var_2750_to_fp16 = const()[name = tensor<string, []>("op_2750_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_265_cast_fp16 = mul(x = var_2749_cast_fp16, y = var_2750_to_fp16)[name = tensor<string, []>("aw_chunk_265_cast_fp16")];
+            tensor<string, []> var_2753_equation_0 = const()[name = tensor<string, []>("op_2753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2753_cast_fp16 = einsum(equation = var_2753_equation_0, values = (var_2547_cast_fp16, var_2436_cast_fp16))[name = tensor<string, []>("op_2753_cast_fp16")];
+            tensor<fp16, []> var_2754_to_fp16 = const()[name = tensor<string, []>("op_2754_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_267_cast_fp16 = mul(x = var_2753_cast_fp16, y = var_2754_to_fp16)[name = tensor<string, []>("aw_chunk_267_cast_fp16")];
+            tensor<string, []> var_2757_equation_0 = const()[name = tensor<string, []>("op_2757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2757_cast_fp16 = einsum(equation = var_2757_equation_0, values = (var_2547_cast_fp16, var_2443_cast_fp16))[name = tensor<string, []>("op_2757_cast_fp16")];
+            tensor<fp16, []> var_2758_to_fp16 = const()[name = tensor<string, []>("op_2758_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_269_cast_fp16 = mul(x = var_2757_cast_fp16, y = var_2758_to_fp16)[name = tensor<string, []>("aw_chunk_269_cast_fp16")];
+            tensor<string, []> var_2761_equation_0 = const()[name = tensor<string, []>("op_2761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2761_cast_fp16 = einsum(equation = var_2761_equation_0, values = (var_2547_cast_fp16, var_2450_cast_fp16))[name = tensor<string, []>("op_2761_cast_fp16")];
+            tensor<fp16, []> var_2762_to_fp16 = const()[name = tensor<string, []>("op_2762_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_271_cast_fp16 = mul(x = var_2761_cast_fp16, y = var_2762_to_fp16)[name = tensor<string, []>("aw_chunk_271_cast_fp16")];
+            tensor<string, []> var_2765_equation_0 = const()[name = tensor<string, []>("op_2765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2765_cast_fp16 = einsum(equation = var_2765_equation_0, values = (var_2551_cast_fp16, var_2457_cast_fp16))[name = tensor<string, []>("op_2765_cast_fp16")];
+            tensor<fp16, []> var_2766_to_fp16 = const()[name = tensor<string, []>("op_2766_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_273_cast_fp16 = mul(x = var_2765_cast_fp16, y = var_2766_to_fp16)[name = tensor<string, []>("aw_chunk_273_cast_fp16")];
+            tensor<string, []> var_2769_equation_0 = const()[name = tensor<string, []>("op_2769_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2769_cast_fp16 = einsum(equation = var_2769_equation_0, values = (var_2551_cast_fp16, var_2464_cast_fp16))[name = tensor<string, []>("op_2769_cast_fp16")];
+            tensor<fp16, []> var_2770_to_fp16 = const()[name = tensor<string, []>("op_2770_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_275_cast_fp16 = mul(x = var_2769_cast_fp16, y = var_2770_to_fp16)[name = tensor<string, []>("aw_chunk_275_cast_fp16")];
+            tensor<string, []> var_2773_equation_0 = const()[name = tensor<string, []>("op_2773_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2773_cast_fp16 = einsum(equation = var_2773_equation_0, values = (var_2551_cast_fp16, var_2471_cast_fp16))[name = tensor<string, []>("op_2773_cast_fp16")];
+            tensor<fp16, []> var_2774_to_fp16 = const()[name = tensor<string, []>("op_2774_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_277_cast_fp16 = mul(x = var_2773_cast_fp16, y = var_2774_to_fp16)[name = tensor<string, []>("aw_chunk_277_cast_fp16")];
+            tensor<string, []> var_2777_equation_0 = const()[name = tensor<string, []>("op_2777_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2777_cast_fp16 = einsum(equation = var_2777_equation_0, values = (var_2551_cast_fp16, var_2478_cast_fp16))[name = tensor<string, []>("op_2777_cast_fp16")];
+            tensor<fp16, []> var_2778_to_fp16 = const()[name = tensor<string, []>("op_2778_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_279_cast_fp16 = mul(x = var_2777_cast_fp16, y = var_2778_to_fp16)[name = tensor<string, []>("aw_chunk_279_cast_fp16")];
+            tensor<string, []> var_2781_equation_0 = const()[name = tensor<string, []>("op_2781_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2781_cast_fp16 = einsum(equation = var_2781_equation_0, values = (var_2555_cast_fp16, var_2485_cast_fp16))[name = tensor<string, []>("op_2781_cast_fp16")];
+            tensor<fp16, []> var_2782_to_fp16 = const()[name = tensor<string, []>("op_2782_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_281_cast_fp16 = mul(x = var_2781_cast_fp16, y = var_2782_to_fp16)[name = tensor<string, []>("aw_chunk_281_cast_fp16")];
+            tensor<string, []> var_2785_equation_0 = const()[name = tensor<string, []>("op_2785_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2785_cast_fp16 = einsum(equation = var_2785_equation_0, values = (var_2555_cast_fp16, var_2492_cast_fp16))[name = tensor<string, []>("op_2785_cast_fp16")];
+            tensor<fp16, []> var_2786_to_fp16 = const()[name = tensor<string, []>("op_2786_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_283_cast_fp16 = mul(x = var_2785_cast_fp16, y = var_2786_to_fp16)[name = tensor<string, []>("aw_chunk_283_cast_fp16")];
+            tensor<string, []> var_2789_equation_0 = const()[name = tensor<string, []>("op_2789_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2789_cast_fp16 = einsum(equation = var_2789_equation_0, values = (var_2555_cast_fp16, var_2499_cast_fp16))[name = tensor<string, []>("op_2789_cast_fp16")];
+            tensor<fp16, []> var_2790_to_fp16 = const()[name = tensor<string, []>("op_2790_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_285_cast_fp16 = mul(x = var_2789_cast_fp16, y = var_2790_to_fp16)[name = tensor<string, []>("aw_chunk_285_cast_fp16")];
+            tensor<string, []> var_2793_equation_0 = const()[name = tensor<string, []>("op_2793_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2793_cast_fp16 = einsum(equation = var_2793_equation_0, values = (var_2555_cast_fp16, var_2506_cast_fp16))[name = tensor<string, []>("op_2793_cast_fp16")];
+            tensor<fp16, []> var_2794_to_fp16 = const()[name = tensor<string, []>("op_2794_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_287_cast_fp16 = mul(x = var_2793_cast_fp16, y = var_2794_to_fp16)[name = tensor<string, []>("aw_chunk_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2796_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_193_cast_fp16)[name = tensor<string, []>("op_2796_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2797_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_195_cast_fp16)[name = tensor<string, []>("op_2797_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2798_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_197_cast_fp16)[name = tensor<string, []>("op_2798_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2799_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_199_cast_fp16)[name = tensor<string, []>("op_2799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2800_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_201_cast_fp16)[name = tensor<string, []>("op_2800_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2801_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_203_cast_fp16)[name = tensor<string, []>("op_2801_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2802_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_205_cast_fp16)[name = tensor<string, []>("op_2802_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2803_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_207_cast_fp16)[name = tensor<string, []>("op_2803_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2804_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_209_cast_fp16)[name = tensor<string, []>("op_2804_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2805_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_211_cast_fp16)[name = tensor<string, []>("op_2805_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2806_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_213_cast_fp16)[name = tensor<string, []>("op_2806_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2807_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_215_cast_fp16)[name = tensor<string, []>("op_2807_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2808_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_217_cast_fp16)[name = tensor<string, []>("op_2808_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2809_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_219_cast_fp16)[name = tensor<string, []>("op_2809_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2810_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_221_cast_fp16)[name = tensor<string, []>("op_2810_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2811_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_223_cast_fp16)[name = tensor<string, []>("op_2811_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2812_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_225_cast_fp16)[name = tensor<string, []>("op_2812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2813_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_227_cast_fp16)[name = tensor<string, []>("op_2813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2814_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_229_cast_fp16)[name = tensor<string, []>("op_2814_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2815_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_231_cast_fp16)[name = tensor<string, []>("op_2815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2816_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_233_cast_fp16)[name = tensor<string, []>("op_2816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2817_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_235_cast_fp16)[name = tensor<string, []>("op_2817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2818_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_237_cast_fp16)[name = tensor<string, []>("op_2818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2819_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_239_cast_fp16)[name = tensor<string, []>("op_2819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2820_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_241_cast_fp16)[name = tensor<string, []>("op_2820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2821_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_243_cast_fp16)[name = tensor<string, []>("op_2821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2822_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_245_cast_fp16)[name = tensor<string, []>("op_2822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2823_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_247_cast_fp16)[name = tensor<string, []>("op_2823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2824_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_249_cast_fp16)[name = tensor<string, []>("op_2824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2825_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_251_cast_fp16)[name = tensor<string, []>("op_2825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2826_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_253_cast_fp16)[name = tensor<string, []>("op_2826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2827_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_255_cast_fp16)[name = tensor<string, []>("op_2827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2828_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_257_cast_fp16)[name = tensor<string, []>("op_2828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2829_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_259_cast_fp16)[name = tensor<string, []>("op_2829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2830_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_261_cast_fp16)[name = tensor<string, []>("op_2830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2831_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_263_cast_fp16)[name = tensor<string, []>("op_2831_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2832_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_265_cast_fp16)[name = tensor<string, []>("op_2832_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2833_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_267_cast_fp16)[name = tensor<string, []>("op_2833_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2834_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_269_cast_fp16)[name = tensor<string, []>("op_2834_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2835_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_271_cast_fp16)[name = tensor<string, []>("op_2835_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2836_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_273_cast_fp16)[name = tensor<string, []>("op_2836_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2837_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_275_cast_fp16)[name = tensor<string, []>("op_2837_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2838_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_277_cast_fp16)[name = tensor<string, []>("op_2838_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2839_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_279_cast_fp16)[name = tensor<string, []>("op_2839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2840_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_281_cast_fp16)[name = tensor<string, []>("op_2840_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2841_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_283_cast_fp16)[name = tensor<string, []>("op_2841_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2842_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_285_cast_fp16)[name = tensor<string, []>("op_2842_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2843_cast_fp16 = softmax(axis = var_2069, x = aw_chunk_287_cast_fp16)[name = tensor<string, []>("op_2843_cast_fp16")];
+            tensor<string, []> var_2845_equation_0 = const()[name = tensor<string, []>("op_2845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2845_cast_fp16 = einsum(equation = var_2845_equation_0, values = (var_2557_cast_fp16, var_2796_cast_fp16))[name = tensor<string, []>("op_2845_cast_fp16")];
+            tensor<string, []> var_2847_equation_0 = const()[name = tensor<string, []>("op_2847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2847_cast_fp16 = einsum(equation = var_2847_equation_0, values = (var_2557_cast_fp16, var_2797_cast_fp16))[name = tensor<string, []>("op_2847_cast_fp16")];
+            tensor<string, []> var_2849_equation_0 = const()[name = tensor<string, []>("op_2849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2849_cast_fp16 = einsum(equation = var_2849_equation_0, values = (var_2557_cast_fp16, var_2798_cast_fp16))[name = tensor<string, []>("op_2849_cast_fp16")];
+            tensor<string, []> var_2851_equation_0 = const()[name = tensor<string, []>("op_2851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2851_cast_fp16 = einsum(equation = var_2851_equation_0, values = (var_2557_cast_fp16, var_2799_cast_fp16))[name = tensor<string, []>("op_2851_cast_fp16")];
+            tensor<string, []> var_2853_equation_0 = const()[name = tensor<string, []>("op_2853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2853_cast_fp16 = einsum(equation = var_2853_equation_0, values = (var_2561_cast_fp16, var_2800_cast_fp16))[name = tensor<string, []>("op_2853_cast_fp16")];
+            tensor<string, []> var_2855_equation_0 = const()[name = tensor<string, []>("op_2855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2855_cast_fp16 = einsum(equation = var_2855_equation_0, values = (var_2561_cast_fp16, var_2801_cast_fp16))[name = tensor<string, []>("op_2855_cast_fp16")];
+            tensor<string, []> var_2857_equation_0 = const()[name = tensor<string, []>("op_2857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2857_cast_fp16 = einsum(equation = var_2857_equation_0, values = (var_2561_cast_fp16, var_2802_cast_fp16))[name = tensor<string, []>("op_2857_cast_fp16")];
+            tensor<string, []> var_2859_equation_0 = const()[name = tensor<string, []>("op_2859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2859_cast_fp16 = einsum(equation = var_2859_equation_0, values = (var_2561_cast_fp16, var_2803_cast_fp16))[name = tensor<string, []>("op_2859_cast_fp16")];
+            tensor<string, []> var_2861_equation_0 = const()[name = tensor<string, []>("op_2861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2861_cast_fp16 = einsum(equation = var_2861_equation_0, values = (var_2565_cast_fp16, var_2804_cast_fp16))[name = tensor<string, []>("op_2861_cast_fp16")];
+            tensor<string, []> var_2863_equation_0 = const()[name = tensor<string, []>("op_2863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2863_cast_fp16 = einsum(equation = var_2863_equation_0, values = (var_2565_cast_fp16, var_2805_cast_fp16))[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<string, []> var_2865_equation_0 = const()[name = tensor<string, []>("op_2865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2865_cast_fp16 = einsum(equation = var_2865_equation_0, values = (var_2565_cast_fp16, var_2806_cast_fp16))[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<string, []> var_2867_equation_0 = const()[name = tensor<string, []>("op_2867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2867_cast_fp16 = einsum(equation = var_2867_equation_0, values = (var_2565_cast_fp16, var_2807_cast_fp16))[name = tensor<string, []>("op_2867_cast_fp16")];
+            tensor<string, []> var_2869_equation_0 = const()[name = tensor<string, []>("op_2869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2869_cast_fp16 = einsum(equation = var_2869_equation_0, values = (var_2569_cast_fp16, var_2808_cast_fp16))[name = tensor<string, []>("op_2869_cast_fp16")];
+            tensor<string, []> var_2871_equation_0 = const()[name = tensor<string, []>("op_2871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2871_cast_fp16 = einsum(equation = var_2871_equation_0, values = (var_2569_cast_fp16, var_2809_cast_fp16))[name = tensor<string, []>("op_2871_cast_fp16")];
+            tensor<string, []> var_2873_equation_0 = const()[name = tensor<string, []>("op_2873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2873_cast_fp16 = einsum(equation = var_2873_equation_0, values = (var_2569_cast_fp16, var_2810_cast_fp16))[name = tensor<string, []>("op_2873_cast_fp16")];
+            tensor<string, []> var_2875_equation_0 = const()[name = tensor<string, []>("op_2875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2875_cast_fp16 = einsum(equation = var_2875_equation_0, values = (var_2569_cast_fp16, var_2811_cast_fp16))[name = tensor<string, []>("op_2875_cast_fp16")];
+            tensor<string, []> var_2877_equation_0 = const()[name = tensor<string, []>("op_2877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2877_cast_fp16 = einsum(equation = var_2877_equation_0, values = (var_2573_cast_fp16, var_2812_cast_fp16))[name = tensor<string, []>("op_2877_cast_fp16")];
+            tensor<string, []> var_2879_equation_0 = const()[name = tensor<string, []>("op_2879_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2879_cast_fp16 = einsum(equation = var_2879_equation_0, values = (var_2573_cast_fp16, var_2813_cast_fp16))[name = tensor<string, []>("op_2879_cast_fp16")];
+            tensor<string, []> var_2881_equation_0 = const()[name = tensor<string, []>("op_2881_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2881_cast_fp16 = einsum(equation = var_2881_equation_0, values = (var_2573_cast_fp16, var_2814_cast_fp16))[name = tensor<string, []>("op_2881_cast_fp16")];
+            tensor<string, []> var_2883_equation_0 = const()[name = tensor<string, []>("op_2883_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2883_cast_fp16 = einsum(equation = var_2883_equation_0, values = (var_2573_cast_fp16, var_2815_cast_fp16))[name = tensor<string, []>("op_2883_cast_fp16")];
+            tensor<string, []> var_2885_equation_0 = const()[name = tensor<string, []>("op_2885_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2885_cast_fp16 = einsum(equation = var_2885_equation_0, values = (var_2577_cast_fp16, var_2816_cast_fp16))[name = tensor<string, []>("op_2885_cast_fp16")];
+            tensor<string, []> var_2887_equation_0 = const()[name = tensor<string, []>("op_2887_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2887_cast_fp16 = einsum(equation = var_2887_equation_0, values = (var_2577_cast_fp16, var_2817_cast_fp16))[name = tensor<string, []>("op_2887_cast_fp16")];
+            tensor<string, []> var_2889_equation_0 = const()[name = tensor<string, []>("op_2889_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2889_cast_fp16 = einsum(equation = var_2889_equation_0, values = (var_2577_cast_fp16, var_2818_cast_fp16))[name = tensor<string, []>("op_2889_cast_fp16")];
+            tensor<string, []> var_2891_equation_0 = const()[name = tensor<string, []>("op_2891_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2891_cast_fp16 = einsum(equation = var_2891_equation_0, values = (var_2577_cast_fp16, var_2819_cast_fp16))[name = tensor<string, []>("op_2891_cast_fp16")];
+            tensor<string, []> var_2893_equation_0 = const()[name = tensor<string, []>("op_2893_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2893_cast_fp16 = einsum(equation = var_2893_equation_0, values = (var_2581_cast_fp16, var_2820_cast_fp16))[name = tensor<string, []>("op_2893_cast_fp16")];
+            tensor<string, []> var_2895_equation_0 = const()[name = tensor<string, []>("op_2895_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2895_cast_fp16 = einsum(equation = var_2895_equation_0, values = (var_2581_cast_fp16, var_2821_cast_fp16))[name = tensor<string, []>("op_2895_cast_fp16")];
+            tensor<string, []> var_2897_equation_0 = const()[name = tensor<string, []>("op_2897_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2897_cast_fp16 = einsum(equation = var_2897_equation_0, values = (var_2581_cast_fp16, var_2822_cast_fp16))[name = tensor<string, []>("op_2897_cast_fp16")];
+            tensor<string, []> var_2899_equation_0 = const()[name = tensor<string, []>("op_2899_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2899_cast_fp16 = einsum(equation = var_2899_equation_0, values = (var_2581_cast_fp16, var_2823_cast_fp16))[name = tensor<string, []>("op_2899_cast_fp16")];
+            tensor<string, []> var_2901_equation_0 = const()[name = tensor<string, []>("op_2901_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2901_cast_fp16 = einsum(equation = var_2901_equation_0, values = (var_2585_cast_fp16, var_2824_cast_fp16))[name = tensor<string, []>("op_2901_cast_fp16")];
+            tensor<string, []> var_2903_equation_0 = const()[name = tensor<string, []>("op_2903_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2903_cast_fp16 = einsum(equation = var_2903_equation_0, values = (var_2585_cast_fp16, var_2825_cast_fp16))[name = tensor<string, []>("op_2903_cast_fp16")];
+            tensor<string, []> var_2905_equation_0 = const()[name = tensor<string, []>("op_2905_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2905_cast_fp16 = einsum(equation = var_2905_equation_0, values = (var_2585_cast_fp16, var_2826_cast_fp16))[name = tensor<string, []>("op_2905_cast_fp16")];
+            tensor<string, []> var_2907_equation_0 = const()[name = tensor<string, []>("op_2907_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2907_cast_fp16 = einsum(equation = var_2907_equation_0, values = (var_2585_cast_fp16, var_2827_cast_fp16))[name = tensor<string, []>("op_2907_cast_fp16")];
+            tensor<string, []> var_2909_equation_0 = const()[name = tensor<string, []>("op_2909_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2909_cast_fp16 = einsum(equation = var_2909_equation_0, values = (var_2589_cast_fp16, var_2828_cast_fp16))[name = tensor<string, []>("op_2909_cast_fp16")];
+            tensor<string, []> var_2911_equation_0 = const()[name = tensor<string, []>("op_2911_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2911_cast_fp16 = einsum(equation = var_2911_equation_0, values = (var_2589_cast_fp16, var_2829_cast_fp16))[name = tensor<string, []>("op_2911_cast_fp16")];
+            tensor<string, []> var_2913_equation_0 = const()[name = tensor<string, []>("op_2913_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2913_cast_fp16 = einsum(equation = var_2913_equation_0, values = (var_2589_cast_fp16, var_2830_cast_fp16))[name = tensor<string, []>("op_2913_cast_fp16")];
+            tensor<string, []> var_2915_equation_0 = const()[name = tensor<string, []>("op_2915_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2915_cast_fp16 = einsum(equation = var_2915_equation_0, values = (var_2589_cast_fp16, var_2831_cast_fp16))[name = tensor<string, []>("op_2915_cast_fp16")];
+            tensor<string, []> var_2917_equation_0 = const()[name = tensor<string, []>("op_2917_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2917_cast_fp16 = einsum(equation = var_2917_equation_0, values = (var_2593_cast_fp16, var_2832_cast_fp16))[name = tensor<string, []>("op_2917_cast_fp16")];
+            tensor<string, []> var_2919_equation_0 = const()[name = tensor<string, []>("op_2919_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2919_cast_fp16 = einsum(equation = var_2919_equation_0, values = (var_2593_cast_fp16, var_2833_cast_fp16))[name = tensor<string, []>("op_2919_cast_fp16")];
+            tensor<string, []> var_2921_equation_0 = const()[name = tensor<string, []>("op_2921_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2921_cast_fp16 = einsum(equation = var_2921_equation_0, values = (var_2593_cast_fp16, var_2834_cast_fp16))[name = tensor<string, []>("op_2921_cast_fp16")];
+            tensor<string, []> var_2923_equation_0 = const()[name = tensor<string, []>("op_2923_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2923_cast_fp16 = einsum(equation = var_2923_equation_0, values = (var_2593_cast_fp16, var_2835_cast_fp16))[name = tensor<string, []>("op_2923_cast_fp16")];
+            tensor<string, []> var_2925_equation_0 = const()[name = tensor<string, []>("op_2925_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2925_cast_fp16 = einsum(equation = var_2925_equation_0, values = (var_2597_cast_fp16, var_2836_cast_fp16))[name = tensor<string, []>("op_2925_cast_fp16")];
+            tensor<string, []> var_2927_equation_0 = const()[name = tensor<string, []>("op_2927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2927_cast_fp16 = einsum(equation = var_2927_equation_0, values = (var_2597_cast_fp16, var_2837_cast_fp16))[name = tensor<string, []>("op_2927_cast_fp16")];
+            tensor<string, []> var_2929_equation_0 = const()[name = tensor<string, []>("op_2929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2929_cast_fp16 = einsum(equation = var_2929_equation_0, values = (var_2597_cast_fp16, var_2838_cast_fp16))[name = tensor<string, []>("op_2929_cast_fp16")];
+            tensor<string, []> var_2931_equation_0 = const()[name = tensor<string, []>("op_2931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2931_cast_fp16 = einsum(equation = var_2931_equation_0, values = (var_2597_cast_fp16, var_2839_cast_fp16))[name = tensor<string, []>("op_2931_cast_fp16")];
+            tensor<string, []> var_2933_equation_0 = const()[name = tensor<string, []>("op_2933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2933_cast_fp16 = einsum(equation = var_2933_equation_0, values = (var_2601_cast_fp16, var_2840_cast_fp16))[name = tensor<string, []>("op_2933_cast_fp16")];
+            tensor<string, []> var_2935_equation_0 = const()[name = tensor<string, []>("op_2935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2935_cast_fp16 = einsum(equation = var_2935_equation_0, values = (var_2601_cast_fp16, var_2841_cast_fp16))[name = tensor<string, []>("op_2935_cast_fp16")];
+            tensor<string, []> var_2937_equation_0 = const()[name = tensor<string, []>("op_2937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2937_cast_fp16 = einsum(equation = var_2937_equation_0, values = (var_2601_cast_fp16, var_2842_cast_fp16))[name = tensor<string, []>("op_2937_cast_fp16")];
+            tensor<string, []> var_2939_equation_0 = const()[name = tensor<string, []>("op_2939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2939_cast_fp16 = einsum(equation = var_2939_equation_0, values = (var_2601_cast_fp16, var_2843_cast_fp16))[name = tensor<string, []>("op_2939_cast_fp16")];
+            tensor<bool, []> var_2941_interleave_0 = const()[name = tensor<string, []>("op_2941_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2941_cast_fp16 = concat(axis = var_2052, interleave = var_2941_interleave_0, values = (var_2845_cast_fp16, var_2847_cast_fp16, var_2849_cast_fp16, var_2851_cast_fp16))[name = tensor<string, []>("op_2941_cast_fp16")];
+            tensor<bool, []> var_2943_interleave_0 = const()[name = tensor<string, []>("op_2943_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2943_cast_fp16 = concat(axis = var_2052, interleave = var_2943_interleave_0, values = (var_2853_cast_fp16, var_2855_cast_fp16, var_2857_cast_fp16, var_2859_cast_fp16))[name = tensor<string, []>("op_2943_cast_fp16")];
+            tensor<bool, []> var_2945_interleave_0 = const()[name = tensor<string, []>("op_2945_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2945_cast_fp16 = concat(axis = var_2052, interleave = var_2945_interleave_0, values = (var_2861_cast_fp16, var_2863_cast_fp16, var_2865_cast_fp16, var_2867_cast_fp16))[name = tensor<string, []>("op_2945_cast_fp16")];
+            tensor<bool, []> var_2947_interleave_0 = const()[name = tensor<string, []>("op_2947_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2947_cast_fp16 = concat(axis = var_2052, interleave = var_2947_interleave_0, values = (var_2869_cast_fp16, var_2871_cast_fp16, var_2873_cast_fp16, var_2875_cast_fp16))[name = tensor<string, []>("op_2947_cast_fp16")];
+            tensor<bool, []> var_2949_interleave_0 = const()[name = tensor<string, []>("op_2949_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2949_cast_fp16 = concat(axis = var_2052, interleave = var_2949_interleave_0, values = (var_2877_cast_fp16, var_2879_cast_fp16, var_2881_cast_fp16, var_2883_cast_fp16))[name = tensor<string, []>("op_2949_cast_fp16")];
+            tensor<bool, []> var_2951_interleave_0 = const()[name = tensor<string, []>("op_2951_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2951_cast_fp16 = concat(axis = var_2052, interleave = var_2951_interleave_0, values = (var_2885_cast_fp16, var_2887_cast_fp16, var_2889_cast_fp16, var_2891_cast_fp16))[name = tensor<string, []>("op_2951_cast_fp16")];
+            tensor<bool, []> var_2953_interleave_0 = const()[name = tensor<string, []>("op_2953_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2953_cast_fp16 = concat(axis = var_2052, interleave = var_2953_interleave_0, values = (var_2893_cast_fp16, var_2895_cast_fp16, var_2897_cast_fp16, var_2899_cast_fp16))[name = tensor<string, []>("op_2953_cast_fp16")];
+            tensor<bool, []> var_2955_interleave_0 = const()[name = tensor<string, []>("op_2955_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2955_cast_fp16 = concat(axis = var_2052, interleave = var_2955_interleave_0, values = (var_2901_cast_fp16, var_2903_cast_fp16, var_2905_cast_fp16, var_2907_cast_fp16))[name = tensor<string, []>("op_2955_cast_fp16")];
+            tensor<bool, []> var_2957_interleave_0 = const()[name = tensor<string, []>("op_2957_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2957_cast_fp16 = concat(axis = var_2052, interleave = var_2957_interleave_0, values = (var_2909_cast_fp16, var_2911_cast_fp16, var_2913_cast_fp16, var_2915_cast_fp16))[name = tensor<string, []>("op_2957_cast_fp16")];
+            tensor<bool, []> var_2959_interleave_0 = const()[name = tensor<string, []>("op_2959_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2959_cast_fp16 = concat(axis = var_2052, interleave = var_2959_interleave_0, values = (var_2917_cast_fp16, var_2919_cast_fp16, var_2921_cast_fp16, var_2923_cast_fp16))[name = tensor<string, []>("op_2959_cast_fp16")];
+            tensor<bool, []> var_2961_interleave_0 = const()[name = tensor<string, []>("op_2961_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2961_cast_fp16 = concat(axis = var_2052, interleave = var_2961_interleave_0, values = (var_2925_cast_fp16, var_2927_cast_fp16, var_2929_cast_fp16, var_2931_cast_fp16))[name = tensor<string, []>("op_2961_cast_fp16")];
+            tensor<bool, []> var_2963_interleave_0 = const()[name = tensor<string, []>("op_2963_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2963_cast_fp16 = concat(axis = var_2052, interleave = var_2963_interleave_0, values = (var_2933_cast_fp16, var_2935_cast_fp16, var_2937_cast_fp16, var_2939_cast_fp16))[name = tensor<string, []>("op_2963_cast_fp16")];
+            tensor<bool, []> input_17_interleave_0 = const()[name = tensor<string, []>("input_17_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_17_cast_fp16 = concat(axis = var_2069, interleave = input_17_interleave_0, values = (var_2941_cast_fp16, var_2943_cast_fp16, var_2945_cast_fp16, var_2947_cast_fp16, var_2949_cast_fp16, var_2951_cast_fp16, var_2953_cast_fp16, var_2955_cast_fp16, var_2957_cast_fp16, var_2959_cast_fp16, var_2961_cast_fp16, var_2963_cast_fp16))[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<int32, [2]> var_2968 = const()[name = tensor<string, []>("op_2968"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2970 = const()[name = tensor<string, []>("op_2970"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38114112)))];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39293824)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_2970, groups = var_2069, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_2968, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_2976 = const()[name = tensor<string, []>("op_2976"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_2976, keep_dims = var_2070, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_2980 = const()[name = tensor<string, []>("op_2980"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2981_cast_fp16 = reduce_mean(axes = var_2980, keep_dims = var_2070, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_2981_cast_fp16")];
+            tensor<fp16, []> var_2982_to_fp16 = const()[name = tensor<string, []>("op_2982_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2983_cast_fp16 = add(x = var_2981_cast_fp16, y = var_2982_to_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_2983_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_19_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_19_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39295424)))];
+            tensor<fp16, [768]> input_19_beta_0_to_fp16 = const()[name = tensor<string, []>("input_19_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39297024)))];
+            tensor<fp16, []> input_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_2994 = const()[name = tensor<string, []>("op_2994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2996 = const()[name = tensor<string, []>("op_2996"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39298624)))];
+            tensor<fp16, [3072]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44017280)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_2996, groups = var_2069, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = var_2994, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_3002 = const()[name = tensor<string, []>("op_3002"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3004 = const()[name = tensor<string, []>("op_3004"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44023488)))];
+            tensor<fp16, [768]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48742144)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_3004, groups = var_2069, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_3002, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_3011 = const()[name = tensor<string, []>("op_3011"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_3028 = const()[name = tensor<string, []>("op_3028"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_3029 = const()[name = tensor<string, []>("op_3029"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_3039 = const()[name = tensor<string, []>("op_3039"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_3039, keep_dims = var_3029, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_3043 = const()[name = tensor<string, []>("op_3043"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_3044_cast_fp16 = reduce_mean(axes = var_3043, keep_dims = var_3029, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_3044_cast_fp16")];
+            tensor<fp16, []> var_3045_to_fp16 = const()[name = tensor<string, []>("op_3045_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_3046_cast_fp16 = add(x = var_3044_cast_fp16, y = var_3045_to_fp16)[name = tensor<string, []>("op_3046_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_3046_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48743744)))];
+            tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48745344)))];
+            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [2]> var_3061 = const()[name = tensor<string, []>("op_3061"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3063 = const()[name = tensor<string, []>("op_3063"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48746944)))];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49926656)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_3063, groups = var_3028, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_3061, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_3067 = const()[name = tensor<string, []>("op_3067"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3069 = const()[name = tensor<string, []>("op_3069"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49928256)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_3069, groups = var_3028, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_3067, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_3074 = const()[name = tensor<string, []>("op_3074"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3076 = const()[name = tensor<string, []>("op_3076"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51107968)))];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52287680)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_3076, groups = var_3028, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_3074, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_3083_begin_0 = const()[name = tensor<string, []>("op_3083_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3083_end_0 = const()[name = tensor<string, []>("op_3083_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3083_end_mask_0 = const()[name = tensor<string, []>("op_3083_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3083_cast_fp16 = slice_by_index(begin = var_3083_begin_0, end = var_3083_end_0, end_mask = var_3083_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3083_cast_fp16")];
+            tensor<int32, [4]> var_3087_begin_0 = const()[name = tensor<string, []>("op_3087_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3087_end_0 = const()[name = tensor<string, []>("op_3087_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3087_end_mask_0 = const()[name = tensor<string, []>("op_3087_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3087_cast_fp16 = slice_by_index(begin = var_3087_begin_0, end = var_3087_end_0, end_mask = var_3087_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3087_cast_fp16")];
+            tensor<int32, [4]> var_3091_begin_0 = const()[name = tensor<string, []>("op_3091_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3091_end_0 = const()[name = tensor<string, []>("op_3091_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3091_end_mask_0 = const()[name = tensor<string, []>("op_3091_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3091_cast_fp16 = slice_by_index(begin = var_3091_begin_0, end = var_3091_end_0, end_mask = var_3091_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3091_cast_fp16")];
+            tensor<int32, [4]> var_3095_begin_0 = const()[name = tensor<string, []>("op_3095_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3095_end_0 = const()[name = tensor<string, []>("op_3095_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3095_end_mask_0 = const()[name = tensor<string, []>("op_3095_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3095_cast_fp16 = slice_by_index(begin = var_3095_begin_0, end = var_3095_end_0, end_mask = var_3095_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3095_cast_fp16")];
+            tensor<int32, [4]> var_3099_begin_0 = const()[name = tensor<string, []>("op_3099_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3099_end_0 = const()[name = tensor<string, []>("op_3099_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3099_end_mask_0 = const()[name = tensor<string, []>("op_3099_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3099_cast_fp16 = slice_by_index(begin = var_3099_begin_0, end = var_3099_end_0, end_mask = var_3099_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3099_cast_fp16")];
+            tensor<int32, [4]> var_3103_begin_0 = const()[name = tensor<string, []>("op_3103_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3103_end_0 = const()[name = tensor<string, []>("op_3103_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3103_end_mask_0 = const()[name = tensor<string, []>("op_3103_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3103_cast_fp16 = slice_by_index(begin = var_3103_begin_0, end = var_3103_end_0, end_mask = var_3103_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3103_cast_fp16")];
+            tensor<int32, [4]> var_3107_begin_0 = const()[name = tensor<string, []>("op_3107_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3107_end_0 = const()[name = tensor<string, []>("op_3107_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3107_end_mask_0 = const()[name = tensor<string, []>("op_3107_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3107_cast_fp16 = slice_by_index(begin = var_3107_begin_0, end = var_3107_end_0, end_mask = var_3107_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3107_cast_fp16")];
+            tensor<int32, [4]> var_3111_begin_0 = const()[name = tensor<string, []>("op_3111_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3111_end_0 = const()[name = tensor<string, []>("op_3111_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3111_end_mask_0 = const()[name = tensor<string, []>("op_3111_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3111_cast_fp16 = slice_by_index(begin = var_3111_begin_0, end = var_3111_end_0, end_mask = var_3111_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3111_cast_fp16")];
+            tensor<int32, [4]> var_3115_begin_0 = const()[name = tensor<string, []>("op_3115_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_3115_end_0 = const()[name = tensor<string, []>("op_3115_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_3115_end_mask_0 = const()[name = tensor<string, []>("op_3115_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3115_cast_fp16 = slice_by_index(begin = var_3115_begin_0, end = var_3115_end_0, end_mask = var_3115_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3115_cast_fp16")];
+            tensor<int32, [4]> var_3119_begin_0 = const()[name = tensor<string, []>("op_3119_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_3119_end_0 = const()[name = tensor<string, []>("op_3119_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_3119_end_mask_0 = const()[name = tensor<string, []>("op_3119_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3119_cast_fp16 = slice_by_index(begin = var_3119_begin_0, end = var_3119_end_0, end_mask = var_3119_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3119_cast_fp16")];
+            tensor<int32, [4]> var_3123_begin_0 = const()[name = tensor<string, []>("op_3123_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_3123_end_0 = const()[name = tensor<string, []>("op_3123_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_3123_end_mask_0 = const()[name = tensor<string, []>("op_3123_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3123_cast_fp16 = slice_by_index(begin = var_3123_begin_0, end = var_3123_end_0, end_mask = var_3123_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3123_cast_fp16")];
+            tensor<int32, [4]> var_3127_begin_0 = const()[name = tensor<string, []>("op_3127_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_3127_end_0 = const()[name = tensor<string, []>("op_3127_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_3127_end_mask_0 = const()[name = tensor<string, []>("op_3127_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3127_cast_fp16 = slice_by_index(begin = var_3127_begin_0, end = var_3127_end_0, end_mask = var_3127_end_mask_0, x = query_7_cast_fp16)[name = tensor<string, []>("op_3127_cast_fp16")];
+            tensor<int32, [4]> var_3136_begin_0 = const()[name = tensor<string, []>("op_3136_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3136_end_0 = const()[name = tensor<string, []>("op_3136_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3136_end_mask_0 = const()[name = tensor<string, []>("op_3136_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3136_cast_fp16 = slice_by_index(begin = var_3136_begin_0, end = var_3136_end_0, end_mask = var_3136_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3136_cast_fp16")];
+            tensor<int32, [4]> var_3143_begin_0 = const()[name = tensor<string, []>("op_3143_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3143_end_0 = const()[name = tensor<string, []>("op_3143_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3143_end_mask_0 = const()[name = tensor<string, []>("op_3143_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3143_cast_fp16 = slice_by_index(begin = var_3143_begin_0, end = var_3143_end_0, end_mask = var_3143_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3143_cast_fp16")];
+            tensor<int32, [4]> var_3150_begin_0 = const()[name = tensor<string, []>("op_3150_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3150_end_0 = const()[name = tensor<string, []>("op_3150_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3150_end_mask_0 = const()[name = tensor<string, []>("op_3150_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3150_cast_fp16 = slice_by_index(begin = var_3150_begin_0, end = var_3150_end_0, end_mask = var_3150_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3150_cast_fp16")];
+            tensor<int32, [4]> var_3157_begin_0 = const()[name = tensor<string, []>("op_3157_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3157_end_0 = const()[name = tensor<string, []>("op_3157_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3157_end_mask_0 = const()[name = tensor<string, []>("op_3157_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3157_cast_fp16 = slice_by_index(begin = var_3157_begin_0, end = var_3157_end_0, end_mask = var_3157_end_mask_0, x = var_3083_cast_fp16)[name = tensor<string, []>("op_3157_cast_fp16")];
+            tensor<int32, [4]> var_3164_begin_0 = const()[name = tensor<string, []>("op_3164_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3164_end_0 = const()[name = tensor<string, []>("op_3164_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3164_end_mask_0 = const()[name = tensor<string, []>("op_3164_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3164_cast_fp16 = slice_by_index(begin = var_3164_begin_0, end = var_3164_end_0, end_mask = var_3164_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3164_cast_fp16")];
+            tensor<int32, [4]> var_3171_begin_0 = const()[name = tensor<string, []>("op_3171_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3171_end_0 = const()[name = tensor<string, []>("op_3171_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3171_end_mask_0 = const()[name = tensor<string, []>("op_3171_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3171_cast_fp16 = slice_by_index(begin = var_3171_begin_0, end = var_3171_end_0, end_mask = var_3171_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3171_cast_fp16")];
+            tensor<int32, [4]> var_3178_begin_0 = const()[name = tensor<string, []>("op_3178_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3178_end_0 = const()[name = tensor<string, []>("op_3178_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3178_end_mask_0 = const()[name = tensor<string, []>("op_3178_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3178_cast_fp16 = slice_by_index(begin = var_3178_begin_0, end = var_3178_end_0, end_mask = var_3178_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3178_cast_fp16")];
+            tensor<int32, [4]> var_3185_begin_0 = const()[name = tensor<string, []>("op_3185_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3185_end_0 = const()[name = tensor<string, []>("op_3185_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3185_end_mask_0 = const()[name = tensor<string, []>("op_3185_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3185_cast_fp16 = slice_by_index(begin = var_3185_begin_0, end = var_3185_end_0, end_mask = var_3185_end_mask_0, x = var_3087_cast_fp16)[name = tensor<string, []>("op_3185_cast_fp16")];
+            tensor<int32, [4]> var_3192_begin_0 = const()[name = tensor<string, []>("op_3192_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3192_end_0 = const()[name = tensor<string, []>("op_3192_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3192_end_mask_0 = const()[name = tensor<string, []>("op_3192_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3192_cast_fp16 = slice_by_index(begin = var_3192_begin_0, end = var_3192_end_0, end_mask = var_3192_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3192_cast_fp16")];
+            tensor<int32, [4]> var_3199_begin_0 = const()[name = tensor<string, []>("op_3199_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3199_end_0 = const()[name = tensor<string, []>("op_3199_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3199_end_mask_0 = const()[name = tensor<string, []>("op_3199_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3199_cast_fp16 = slice_by_index(begin = var_3199_begin_0, end = var_3199_end_0, end_mask = var_3199_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3199_cast_fp16")];
+            tensor<int32, [4]> var_3206_begin_0 = const()[name = tensor<string, []>("op_3206_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3206_end_0 = const()[name = tensor<string, []>("op_3206_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3206_end_mask_0 = const()[name = tensor<string, []>("op_3206_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3206_cast_fp16 = slice_by_index(begin = var_3206_begin_0, end = var_3206_end_0, end_mask = var_3206_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3206_cast_fp16")];
+            tensor<int32, [4]> var_3213_begin_0 = const()[name = tensor<string, []>("op_3213_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3213_end_0 = const()[name = tensor<string, []>("op_3213_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3213_end_mask_0 = const()[name = tensor<string, []>("op_3213_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3213_cast_fp16 = slice_by_index(begin = var_3213_begin_0, end = var_3213_end_0, end_mask = var_3213_end_mask_0, x = var_3091_cast_fp16)[name = tensor<string, []>("op_3213_cast_fp16")];
+            tensor<int32, [4]> var_3220_begin_0 = const()[name = tensor<string, []>("op_3220_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3220_end_0 = const()[name = tensor<string, []>("op_3220_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3220_end_mask_0 = const()[name = tensor<string, []>("op_3220_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3220_cast_fp16 = slice_by_index(begin = var_3220_begin_0, end = var_3220_end_0, end_mask = var_3220_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3220_cast_fp16")];
+            tensor<int32, [4]> var_3227_begin_0 = const()[name = tensor<string, []>("op_3227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3227_end_0 = const()[name = tensor<string, []>("op_3227_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3227_end_mask_0 = const()[name = tensor<string, []>("op_3227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3227_cast_fp16 = slice_by_index(begin = var_3227_begin_0, end = var_3227_end_0, end_mask = var_3227_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3227_cast_fp16")];
+            tensor<int32, [4]> var_3234_begin_0 = const()[name = tensor<string, []>("op_3234_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3234_end_0 = const()[name = tensor<string, []>("op_3234_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3234_end_mask_0 = const()[name = tensor<string, []>("op_3234_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3234_cast_fp16 = slice_by_index(begin = var_3234_begin_0, end = var_3234_end_0, end_mask = var_3234_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3234_cast_fp16")];
+            tensor<int32, [4]> var_3241_begin_0 = const()[name = tensor<string, []>("op_3241_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3241_end_0 = const()[name = tensor<string, []>("op_3241_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3241_end_mask_0 = const()[name = tensor<string, []>("op_3241_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3241_cast_fp16 = slice_by_index(begin = var_3241_begin_0, end = var_3241_end_0, end_mask = var_3241_end_mask_0, x = var_3095_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<int32, [4]> var_3248_begin_0 = const()[name = tensor<string, []>("op_3248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3248_end_0 = const()[name = tensor<string, []>("op_3248_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3248_end_mask_0 = const()[name = tensor<string, []>("op_3248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3248_cast_fp16 = slice_by_index(begin = var_3248_begin_0, end = var_3248_end_0, end_mask = var_3248_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3248_cast_fp16")];
+            tensor<int32, [4]> var_3255_begin_0 = const()[name = tensor<string, []>("op_3255_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3255_end_0 = const()[name = tensor<string, []>("op_3255_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3255_end_mask_0 = const()[name = tensor<string, []>("op_3255_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3255_cast_fp16 = slice_by_index(begin = var_3255_begin_0, end = var_3255_end_0, end_mask = var_3255_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3255_cast_fp16")];
+            tensor<int32, [4]> var_3262_begin_0 = const()[name = tensor<string, []>("op_3262_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3262_end_0 = const()[name = tensor<string, []>("op_3262_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3262_end_mask_0 = const()[name = tensor<string, []>("op_3262_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3262_cast_fp16 = slice_by_index(begin = var_3262_begin_0, end = var_3262_end_0, end_mask = var_3262_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3262_cast_fp16")];
+            tensor<int32, [4]> var_3269_begin_0 = const()[name = tensor<string, []>("op_3269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3269_end_0 = const()[name = tensor<string, []>("op_3269_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3269_end_mask_0 = const()[name = tensor<string, []>("op_3269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3269_cast_fp16 = slice_by_index(begin = var_3269_begin_0, end = var_3269_end_0, end_mask = var_3269_end_mask_0, x = var_3099_cast_fp16)[name = tensor<string, []>("op_3269_cast_fp16")];
+            tensor<int32, [4]> var_3276_begin_0 = const()[name = tensor<string, []>("op_3276_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3276_end_0 = const()[name = tensor<string, []>("op_3276_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3276_end_mask_0 = const()[name = tensor<string, []>("op_3276_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3276_cast_fp16 = slice_by_index(begin = var_3276_begin_0, end = var_3276_end_0, end_mask = var_3276_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<int32, [4]> var_3283_begin_0 = const()[name = tensor<string, []>("op_3283_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3283_end_0 = const()[name = tensor<string, []>("op_3283_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3283_end_mask_0 = const()[name = tensor<string, []>("op_3283_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3283_cast_fp16 = slice_by_index(begin = var_3283_begin_0, end = var_3283_end_0, end_mask = var_3283_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3283_cast_fp16")];
+            tensor<int32, [4]> var_3290_begin_0 = const()[name = tensor<string, []>("op_3290_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3290_end_0 = const()[name = tensor<string, []>("op_3290_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3290_end_mask_0 = const()[name = tensor<string, []>("op_3290_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3290_cast_fp16 = slice_by_index(begin = var_3290_begin_0, end = var_3290_end_0, end_mask = var_3290_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3290_cast_fp16")];
+            tensor<int32, [4]> var_3297_begin_0 = const()[name = tensor<string, []>("op_3297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3297_end_0 = const()[name = tensor<string, []>("op_3297_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3297_end_mask_0 = const()[name = tensor<string, []>("op_3297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3297_cast_fp16 = slice_by_index(begin = var_3297_begin_0, end = var_3297_end_0, end_mask = var_3297_end_mask_0, x = var_3103_cast_fp16)[name = tensor<string, []>("op_3297_cast_fp16")];
+            tensor<int32, [4]> var_3304_begin_0 = const()[name = tensor<string, []>("op_3304_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3304_end_0 = const()[name = tensor<string, []>("op_3304_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3304_end_mask_0 = const()[name = tensor<string, []>("op_3304_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3304_cast_fp16 = slice_by_index(begin = var_3304_begin_0, end = var_3304_end_0, end_mask = var_3304_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3304_cast_fp16")];
+            tensor<int32, [4]> var_3311_begin_0 = const()[name = tensor<string, []>("op_3311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3311_end_0 = const()[name = tensor<string, []>("op_3311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3311_end_mask_0 = const()[name = tensor<string, []>("op_3311_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3311_cast_fp16 = slice_by_index(begin = var_3311_begin_0, end = var_3311_end_0, end_mask = var_3311_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3311_cast_fp16")];
+            tensor<int32, [4]> var_3318_begin_0 = const()[name = tensor<string, []>("op_3318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3318_end_0 = const()[name = tensor<string, []>("op_3318_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3318_end_mask_0 = const()[name = tensor<string, []>("op_3318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3318_cast_fp16 = slice_by_index(begin = var_3318_begin_0, end = var_3318_end_0, end_mask = var_3318_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3318_cast_fp16")];
+            tensor<int32, [4]> var_3325_begin_0 = const()[name = tensor<string, []>("op_3325_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3325_end_0 = const()[name = tensor<string, []>("op_3325_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3325_end_mask_0 = const()[name = tensor<string, []>("op_3325_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3325_cast_fp16 = slice_by_index(begin = var_3325_begin_0, end = var_3325_end_0, end_mask = var_3325_end_mask_0, x = var_3107_cast_fp16)[name = tensor<string, []>("op_3325_cast_fp16")];
+            tensor<int32, [4]> var_3332_begin_0 = const()[name = tensor<string, []>("op_3332_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3332_end_0 = const()[name = tensor<string, []>("op_3332_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3332_end_mask_0 = const()[name = tensor<string, []>("op_3332_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3332_cast_fp16 = slice_by_index(begin = var_3332_begin_0, end = var_3332_end_0, end_mask = var_3332_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3332_cast_fp16")];
+            tensor<int32, [4]> var_3339_begin_0 = const()[name = tensor<string, []>("op_3339_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3339_end_0 = const()[name = tensor<string, []>("op_3339_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3339_end_mask_0 = const()[name = tensor<string, []>("op_3339_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3339_cast_fp16 = slice_by_index(begin = var_3339_begin_0, end = var_3339_end_0, end_mask = var_3339_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3339_cast_fp16")];
+            tensor<int32, [4]> var_3346_begin_0 = const()[name = tensor<string, []>("op_3346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3346_end_0 = const()[name = tensor<string, []>("op_3346_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3346_end_mask_0 = const()[name = tensor<string, []>("op_3346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3346_cast_fp16 = slice_by_index(begin = var_3346_begin_0, end = var_3346_end_0, end_mask = var_3346_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3346_cast_fp16")];
+            tensor<int32, [4]> var_3353_begin_0 = const()[name = tensor<string, []>("op_3353_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3353_end_0 = const()[name = tensor<string, []>("op_3353_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3353_end_mask_0 = const()[name = tensor<string, []>("op_3353_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3353_cast_fp16 = slice_by_index(begin = var_3353_begin_0, end = var_3353_end_0, end_mask = var_3353_end_mask_0, x = var_3111_cast_fp16)[name = tensor<string, []>("op_3353_cast_fp16")];
+            tensor<int32, [4]> var_3360_begin_0 = const()[name = tensor<string, []>("op_3360_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3360_end_0 = const()[name = tensor<string, []>("op_3360_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3360_end_mask_0 = const()[name = tensor<string, []>("op_3360_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3360_cast_fp16 = slice_by_index(begin = var_3360_begin_0, end = var_3360_end_0, end_mask = var_3360_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3360_cast_fp16")];
+            tensor<int32, [4]> var_3367_begin_0 = const()[name = tensor<string, []>("op_3367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3367_end_0 = const()[name = tensor<string, []>("op_3367_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3367_end_mask_0 = const()[name = tensor<string, []>("op_3367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3367_cast_fp16 = slice_by_index(begin = var_3367_begin_0, end = var_3367_end_0, end_mask = var_3367_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3367_cast_fp16")];
+            tensor<int32, [4]> var_3374_begin_0 = const()[name = tensor<string, []>("op_3374_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3374_end_0 = const()[name = tensor<string, []>("op_3374_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3374_end_mask_0 = const()[name = tensor<string, []>("op_3374_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3374_cast_fp16 = slice_by_index(begin = var_3374_begin_0, end = var_3374_end_0, end_mask = var_3374_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3374_cast_fp16")];
+            tensor<int32, [4]> var_3381_begin_0 = const()[name = tensor<string, []>("op_3381_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3381_end_0 = const()[name = tensor<string, []>("op_3381_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3381_end_mask_0 = const()[name = tensor<string, []>("op_3381_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3381_cast_fp16 = slice_by_index(begin = var_3381_begin_0, end = var_3381_end_0, end_mask = var_3381_end_mask_0, x = var_3115_cast_fp16)[name = tensor<string, []>("op_3381_cast_fp16")];
+            tensor<int32, [4]> var_3388_begin_0 = const()[name = tensor<string, []>("op_3388_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3388_end_0 = const()[name = tensor<string, []>("op_3388_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3388_end_mask_0 = const()[name = tensor<string, []>("op_3388_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3388_cast_fp16 = slice_by_index(begin = var_3388_begin_0, end = var_3388_end_0, end_mask = var_3388_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3388_cast_fp16")];
+            tensor<int32, [4]> var_3395_begin_0 = const()[name = tensor<string, []>("op_3395_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3395_end_0 = const()[name = tensor<string, []>("op_3395_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3395_end_mask_0 = const()[name = tensor<string, []>("op_3395_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3395_cast_fp16 = slice_by_index(begin = var_3395_begin_0, end = var_3395_end_0, end_mask = var_3395_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3395_cast_fp16")];
+            tensor<int32, [4]> var_3402_begin_0 = const()[name = tensor<string, []>("op_3402_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3402_end_0 = const()[name = tensor<string, []>("op_3402_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3402_end_mask_0 = const()[name = tensor<string, []>("op_3402_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3402_cast_fp16 = slice_by_index(begin = var_3402_begin_0, end = var_3402_end_0, end_mask = var_3402_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3402_cast_fp16")];
+            tensor<int32, [4]> var_3409_begin_0 = const()[name = tensor<string, []>("op_3409_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3409_end_0 = const()[name = tensor<string, []>("op_3409_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3409_end_mask_0 = const()[name = tensor<string, []>("op_3409_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3409_cast_fp16 = slice_by_index(begin = var_3409_begin_0, end = var_3409_end_0, end_mask = var_3409_end_mask_0, x = var_3119_cast_fp16)[name = tensor<string, []>("op_3409_cast_fp16")];
+            tensor<int32, [4]> var_3416_begin_0 = const()[name = tensor<string, []>("op_3416_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3416_end_0 = const()[name = tensor<string, []>("op_3416_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3416_end_mask_0 = const()[name = tensor<string, []>("op_3416_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3416_cast_fp16 = slice_by_index(begin = var_3416_begin_0, end = var_3416_end_0, end_mask = var_3416_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3416_cast_fp16")];
+            tensor<int32, [4]> var_3423_begin_0 = const()[name = tensor<string, []>("op_3423_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3423_end_0 = const()[name = tensor<string, []>("op_3423_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3423_end_mask_0 = const()[name = tensor<string, []>("op_3423_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3423_cast_fp16 = slice_by_index(begin = var_3423_begin_0, end = var_3423_end_0, end_mask = var_3423_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3423_cast_fp16")];
+            tensor<int32, [4]> var_3430_begin_0 = const()[name = tensor<string, []>("op_3430_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3430_end_0 = const()[name = tensor<string, []>("op_3430_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3430_end_mask_0 = const()[name = tensor<string, []>("op_3430_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3430_cast_fp16 = slice_by_index(begin = var_3430_begin_0, end = var_3430_end_0, end_mask = var_3430_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3430_cast_fp16")];
+            tensor<int32, [4]> var_3437_begin_0 = const()[name = tensor<string, []>("op_3437_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3437_end_0 = const()[name = tensor<string, []>("op_3437_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3437_end_mask_0 = const()[name = tensor<string, []>("op_3437_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3437_cast_fp16 = slice_by_index(begin = var_3437_begin_0, end = var_3437_end_0, end_mask = var_3437_end_mask_0, x = var_3123_cast_fp16)[name = tensor<string, []>("op_3437_cast_fp16")];
+            tensor<int32, [4]> var_3444_begin_0 = const()[name = tensor<string, []>("op_3444_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3444_end_0 = const()[name = tensor<string, []>("op_3444_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_3444_end_mask_0 = const()[name = tensor<string, []>("op_3444_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3444_cast_fp16 = slice_by_index(begin = var_3444_begin_0, end = var_3444_end_0, end_mask = var_3444_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3444_cast_fp16")];
+            tensor<int32, [4]> var_3451_begin_0 = const()[name = tensor<string, []>("op_3451_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_3451_end_0 = const()[name = tensor<string, []>("op_3451_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_3451_end_mask_0 = const()[name = tensor<string, []>("op_3451_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3451_cast_fp16 = slice_by_index(begin = var_3451_begin_0, end = var_3451_end_0, end_mask = var_3451_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3451_cast_fp16")];
+            tensor<int32, [4]> var_3458_begin_0 = const()[name = tensor<string, []>("op_3458_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_3458_end_0 = const()[name = tensor<string, []>("op_3458_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_3458_end_mask_0 = const()[name = tensor<string, []>("op_3458_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3458_cast_fp16 = slice_by_index(begin = var_3458_begin_0, end = var_3458_end_0, end_mask = var_3458_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3458_cast_fp16")];
+            tensor<int32, [4]> var_3465_begin_0 = const()[name = tensor<string, []>("op_3465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_3465_end_0 = const()[name = tensor<string, []>("op_3465_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3465_end_mask_0 = const()[name = tensor<string, []>("op_3465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_3465_cast_fp16 = slice_by_index(begin = var_3465_begin_0, end = var_3465_end_0, end_mask = var_3465_end_mask_0, x = var_3127_cast_fp16)[name = tensor<string, []>("op_3465_cast_fp16")];
+            tensor<int32, [4]> k_7_perm_0 = const()[name = tensor<string, []>("k_7_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_3470_begin_0 = const()[name = tensor<string, []>("op_3470_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3470_end_0 = const()[name = tensor<string, []>("op_3470_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_3470_end_mask_0 = const()[name = tensor<string, []>("op_3470_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_8 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3470_cast_fp16 = slice_by_index(begin = var_3470_begin_0, end = var_3470_end_0, end_mask = var_3470_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3470_cast_fp16")];
+            tensor<int32, [4]> var_3474_begin_0 = const()[name = tensor<string, []>("op_3474_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_3474_end_0 = const()[name = tensor<string, []>("op_3474_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_3474_end_mask_0 = const()[name = tensor<string, []>("op_3474_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3474_cast_fp16 = slice_by_index(begin = var_3474_begin_0, end = var_3474_end_0, end_mask = var_3474_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3474_cast_fp16")];
+            tensor<int32, [4]> var_3478_begin_0 = const()[name = tensor<string, []>("op_3478_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_3478_end_0 = const()[name = tensor<string, []>("op_3478_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_3478_end_mask_0 = const()[name = tensor<string, []>("op_3478_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3478_cast_fp16 = slice_by_index(begin = var_3478_begin_0, end = var_3478_end_0, end_mask = var_3478_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3478_cast_fp16")];
+            tensor<int32, [4]> var_3482_begin_0 = const()[name = tensor<string, []>("op_3482_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_3482_end_0 = const()[name = tensor<string, []>("op_3482_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_3482_end_mask_0 = const()[name = tensor<string, []>("op_3482_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3482_cast_fp16 = slice_by_index(begin = var_3482_begin_0, end = var_3482_end_0, end_mask = var_3482_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3482_cast_fp16")];
+            tensor<int32, [4]> var_3486_begin_0 = const()[name = tensor<string, []>("op_3486_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_3486_end_0 = const()[name = tensor<string, []>("op_3486_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_3486_end_mask_0 = const()[name = tensor<string, []>("op_3486_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3486_cast_fp16 = slice_by_index(begin = var_3486_begin_0, end = var_3486_end_0, end_mask = var_3486_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3486_cast_fp16")];
+            tensor<int32, [4]> var_3490_begin_0 = const()[name = tensor<string, []>("op_3490_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_3490_end_0 = const()[name = tensor<string, []>("op_3490_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_3490_end_mask_0 = const()[name = tensor<string, []>("op_3490_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3490_cast_fp16 = slice_by_index(begin = var_3490_begin_0, end = var_3490_end_0, end_mask = var_3490_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3490_cast_fp16")];
+            tensor<int32, [4]> var_3494_begin_0 = const()[name = tensor<string, []>("op_3494_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_3494_end_0 = const()[name = tensor<string, []>("op_3494_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_3494_end_mask_0 = const()[name = tensor<string, []>("op_3494_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3494_cast_fp16 = slice_by_index(begin = var_3494_begin_0, end = var_3494_end_0, end_mask = var_3494_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3494_cast_fp16")];
+            tensor<int32, [4]> var_3498_begin_0 = const()[name = tensor<string, []>("op_3498_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_3498_end_0 = const()[name = tensor<string, []>("op_3498_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_3498_end_mask_0 = const()[name = tensor<string, []>("op_3498_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3498_cast_fp16 = slice_by_index(begin = var_3498_begin_0, end = var_3498_end_0, end_mask = var_3498_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3498_cast_fp16")];
+            tensor<int32, [4]> var_3502_begin_0 = const()[name = tensor<string, []>("op_3502_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_3502_end_0 = const()[name = tensor<string, []>("op_3502_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_3502_end_mask_0 = const()[name = tensor<string, []>("op_3502_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3502_cast_fp16 = slice_by_index(begin = var_3502_begin_0, end = var_3502_end_0, end_mask = var_3502_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3502_cast_fp16")];
+            tensor<int32, [4]> var_3506_begin_0 = const()[name = tensor<string, []>("op_3506_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_3506_end_0 = const()[name = tensor<string, []>("op_3506_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_3506_end_mask_0 = const()[name = tensor<string, []>("op_3506_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3506_cast_fp16 = slice_by_index(begin = var_3506_begin_0, end = var_3506_end_0, end_mask = var_3506_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3506_cast_fp16")];
+            tensor<int32, [4]> var_3510_begin_0 = const()[name = tensor<string, []>("op_3510_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_3510_end_0 = const()[name = tensor<string, []>("op_3510_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_3510_end_mask_0 = const()[name = tensor<string, []>("op_3510_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3510_cast_fp16 = slice_by_index(begin = var_3510_begin_0, end = var_3510_end_0, end_mask = var_3510_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3510_cast_fp16")];
+            tensor<int32, [4]> var_3514_begin_0 = const()[name = tensor<string, []>("op_3514_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_3514_end_0 = const()[name = tensor<string, []>("op_3514_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_3514_end_mask_0 = const()[name = tensor<string, []>("op_3514_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_3514_cast_fp16 = slice_by_index(begin = var_3514_begin_0, end = var_3514_end_0, end_mask = var_3514_end_mask_0, x = transpose_8)[name = tensor<string, []>("op_3514_cast_fp16")];
+            tensor<int32, [4]> var_3516_begin_0 = const()[name = tensor<string, []>("op_3516_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3516_end_0 = const()[name = tensor<string, []>("op_3516_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_3516_end_mask_0 = const()[name = tensor<string, []>("op_3516_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3516_cast_fp16 = slice_by_index(begin = var_3516_begin_0, end = var_3516_end_0, end_mask = var_3516_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3516_cast_fp16")];
+            tensor<int32, [4]> var_3520_begin_0 = const()[name = tensor<string, []>("op_3520_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_3520_end_0 = const()[name = tensor<string, []>("op_3520_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_3520_end_mask_0 = const()[name = tensor<string, []>("op_3520_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3520_cast_fp16 = slice_by_index(begin = var_3520_begin_0, end = var_3520_end_0, end_mask = var_3520_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3520_cast_fp16")];
+            tensor<int32, [4]> var_3524_begin_0 = const()[name = tensor<string, []>("op_3524_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_3524_end_0 = const()[name = tensor<string, []>("op_3524_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_3524_end_mask_0 = const()[name = tensor<string, []>("op_3524_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3524_cast_fp16 = slice_by_index(begin = var_3524_begin_0, end = var_3524_end_0, end_mask = var_3524_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3524_cast_fp16")];
+            tensor<int32, [4]> var_3528_begin_0 = const()[name = tensor<string, []>("op_3528_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_3528_end_0 = const()[name = tensor<string, []>("op_3528_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_3528_end_mask_0 = const()[name = tensor<string, []>("op_3528_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3528_cast_fp16 = slice_by_index(begin = var_3528_begin_0, end = var_3528_end_0, end_mask = var_3528_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3528_cast_fp16")];
+            tensor<int32, [4]> var_3532_begin_0 = const()[name = tensor<string, []>("op_3532_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_3532_end_0 = const()[name = tensor<string, []>("op_3532_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_3532_end_mask_0 = const()[name = tensor<string, []>("op_3532_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3532_cast_fp16 = slice_by_index(begin = var_3532_begin_0, end = var_3532_end_0, end_mask = var_3532_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3532_cast_fp16")];
+            tensor<int32, [4]> var_3536_begin_0 = const()[name = tensor<string, []>("op_3536_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_3536_end_0 = const()[name = tensor<string, []>("op_3536_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_3536_end_mask_0 = const()[name = tensor<string, []>("op_3536_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3536_cast_fp16 = slice_by_index(begin = var_3536_begin_0, end = var_3536_end_0, end_mask = var_3536_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3536_cast_fp16")];
+            tensor<int32, [4]> var_3540_begin_0 = const()[name = tensor<string, []>("op_3540_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_3540_end_0 = const()[name = tensor<string, []>("op_3540_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_3540_end_mask_0 = const()[name = tensor<string, []>("op_3540_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3540_cast_fp16 = slice_by_index(begin = var_3540_begin_0, end = var_3540_end_0, end_mask = var_3540_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3540_cast_fp16")];
+            tensor<int32, [4]> var_3544_begin_0 = const()[name = tensor<string, []>("op_3544_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_3544_end_0 = const()[name = tensor<string, []>("op_3544_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_3544_end_mask_0 = const()[name = tensor<string, []>("op_3544_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3544_cast_fp16 = slice_by_index(begin = var_3544_begin_0, end = var_3544_end_0, end_mask = var_3544_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3544_cast_fp16")];
+            tensor<int32, [4]> var_3548_begin_0 = const()[name = tensor<string, []>("op_3548_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_3548_end_0 = const()[name = tensor<string, []>("op_3548_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_3548_end_mask_0 = const()[name = tensor<string, []>("op_3548_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3548_cast_fp16 = slice_by_index(begin = var_3548_begin_0, end = var_3548_end_0, end_mask = var_3548_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3548_cast_fp16")];
+            tensor<int32, [4]> var_3552_begin_0 = const()[name = tensor<string, []>("op_3552_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_3552_end_0 = const()[name = tensor<string, []>("op_3552_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_3552_end_mask_0 = const()[name = tensor<string, []>("op_3552_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3552_cast_fp16 = slice_by_index(begin = var_3552_begin_0, end = var_3552_end_0, end_mask = var_3552_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3552_cast_fp16")];
+            tensor<int32, [4]> var_3556_begin_0 = const()[name = tensor<string, []>("op_3556_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_3556_end_0 = const()[name = tensor<string, []>("op_3556_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_3556_end_mask_0 = const()[name = tensor<string, []>("op_3556_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3556_cast_fp16 = slice_by_index(begin = var_3556_begin_0, end = var_3556_end_0, end_mask = var_3556_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3556_cast_fp16")];
+            tensor<int32, [4]> var_3560_begin_0 = const()[name = tensor<string, []>("op_3560_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_3560_end_0 = const()[name = tensor<string, []>("op_3560_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_3560_end_mask_0 = const()[name = tensor<string, []>("op_3560_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_3560_cast_fp16 = slice_by_index(begin = var_3560_begin_0, end = var_3560_end_0, end_mask = var_3560_end_mask_0, x = value_7_cast_fp16)[name = tensor<string, []>("op_3560_cast_fp16")];
+            tensor<string, []> var_3564_equation_0 = const()[name = tensor<string, []>("op_3564_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3564_cast_fp16 = einsum(equation = var_3564_equation_0, values = (var_3470_cast_fp16, var_3136_cast_fp16))[name = tensor<string, []>("op_3564_cast_fp16")];
+            tensor<fp16, []> var_3565_to_fp16 = const()[name = tensor<string, []>("op_3565_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_289_cast_fp16 = mul(x = var_3564_cast_fp16, y = var_3565_to_fp16)[name = tensor<string, []>("aw_chunk_289_cast_fp16")];
+            tensor<string, []> var_3568_equation_0 = const()[name = tensor<string, []>("op_3568_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3568_cast_fp16 = einsum(equation = var_3568_equation_0, values = (var_3470_cast_fp16, var_3143_cast_fp16))[name = tensor<string, []>("op_3568_cast_fp16")];
+            tensor<fp16, []> var_3569_to_fp16 = const()[name = tensor<string, []>("op_3569_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_291_cast_fp16 = mul(x = var_3568_cast_fp16, y = var_3569_to_fp16)[name = tensor<string, []>("aw_chunk_291_cast_fp16")];
+            tensor<string, []> var_3572_equation_0 = const()[name = tensor<string, []>("op_3572_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3572_cast_fp16 = einsum(equation = var_3572_equation_0, values = (var_3470_cast_fp16, var_3150_cast_fp16))[name = tensor<string, []>("op_3572_cast_fp16")];
+            tensor<fp16, []> var_3573_to_fp16 = const()[name = tensor<string, []>("op_3573_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_293_cast_fp16 = mul(x = var_3572_cast_fp16, y = var_3573_to_fp16)[name = tensor<string, []>("aw_chunk_293_cast_fp16")];
+            tensor<string, []> var_3576_equation_0 = const()[name = tensor<string, []>("op_3576_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3576_cast_fp16 = einsum(equation = var_3576_equation_0, values = (var_3470_cast_fp16, var_3157_cast_fp16))[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, []> var_3577_to_fp16 = const()[name = tensor<string, []>("op_3577_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_295_cast_fp16 = mul(x = var_3576_cast_fp16, y = var_3577_to_fp16)[name = tensor<string, []>("aw_chunk_295_cast_fp16")];
+            tensor<string, []> var_3580_equation_0 = const()[name = tensor<string, []>("op_3580_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3580_cast_fp16 = einsum(equation = var_3580_equation_0, values = (var_3474_cast_fp16, var_3164_cast_fp16))[name = tensor<string, []>("op_3580_cast_fp16")];
+            tensor<fp16, []> var_3581_to_fp16 = const()[name = tensor<string, []>("op_3581_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_297_cast_fp16 = mul(x = var_3580_cast_fp16, y = var_3581_to_fp16)[name = tensor<string, []>("aw_chunk_297_cast_fp16")];
+            tensor<string, []> var_3584_equation_0 = const()[name = tensor<string, []>("op_3584_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3584_cast_fp16 = einsum(equation = var_3584_equation_0, values = (var_3474_cast_fp16, var_3171_cast_fp16))[name = tensor<string, []>("op_3584_cast_fp16")];
+            tensor<fp16, []> var_3585_to_fp16 = const()[name = tensor<string, []>("op_3585_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_299_cast_fp16 = mul(x = var_3584_cast_fp16, y = var_3585_to_fp16)[name = tensor<string, []>("aw_chunk_299_cast_fp16")];
+            tensor<string, []> var_3588_equation_0 = const()[name = tensor<string, []>("op_3588_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3588_cast_fp16 = einsum(equation = var_3588_equation_0, values = (var_3474_cast_fp16, var_3178_cast_fp16))[name = tensor<string, []>("op_3588_cast_fp16")];
+            tensor<fp16, []> var_3589_to_fp16 = const()[name = tensor<string, []>("op_3589_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_301_cast_fp16 = mul(x = var_3588_cast_fp16, y = var_3589_to_fp16)[name = tensor<string, []>("aw_chunk_301_cast_fp16")];
+            tensor<string, []> var_3592_equation_0 = const()[name = tensor<string, []>("op_3592_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3592_cast_fp16 = einsum(equation = var_3592_equation_0, values = (var_3474_cast_fp16, var_3185_cast_fp16))[name = tensor<string, []>("op_3592_cast_fp16")];
+            tensor<fp16, []> var_3593_to_fp16 = const()[name = tensor<string, []>("op_3593_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_303_cast_fp16 = mul(x = var_3592_cast_fp16, y = var_3593_to_fp16)[name = tensor<string, []>("aw_chunk_303_cast_fp16")];
+            tensor<string, []> var_3596_equation_0 = const()[name = tensor<string, []>("op_3596_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3596_cast_fp16 = einsum(equation = var_3596_equation_0, values = (var_3478_cast_fp16, var_3192_cast_fp16))[name = tensor<string, []>("op_3596_cast_fp16")];
+            tensor<fp16, []> var_3597_to_fp16 = const()[name = tensor<string, []>("op_3597_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_305_cast_fp16 = mul(x = var_3596_cast_fp16, y = var_3597_to_fp16)[name = tensor<string, []>("aw_chunk_305_cast_fp16")];
+            tensor<string, []> var_3600_equation_0 = const()[name = tensor<string, []>("op_3600_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3600_cast_fp16 = einsum(equation = var_3600_equation_0, values = (var_3478_cast_fp16, var_3199_cast_fp16))[name = tensor<string, []>("op_3600_cast_fp16")];
+            tensor<fp16, []> var_3601_to_fp16 = const()[name = tensor<string, []>("op_3601_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_307_cast_fp16 = mul(x = var_3600_cast_fp16, y = var_3601_to_fp16)[name = tensor<string, []>("aw_chunk_307_cast_fp16")];
+            tensor<string, []> var_3604_equation_0 = const()[name = tensor<string, []>("op_3604_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3604_cast_fp16 = einsum(equation = var_3604_equation_0, values = (var_3478_cast_fp16, var_3206_cast_fp16))[name = tensor<string, []>("op_3604_cast_fp16")];
+            tensor<fp16, []> var_3605_to_fp16 = const()[name = tensor<string, []>("op_3605_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_309_cast_fp16 = mul(x = var_3604_cast_fp16, y = var_3605_to_fp16)[name = tensor<string, []>("aw_chunk_309_cast_fp16")];
+            tensor<string, []> var_3608_equation_0 = const()[name = tensor<string, []>("op_3608_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3608_cast_fp16 = einsum(equation = var_3608_equation_0, values = (var_3478_cast_fp16, var_3213_cast_fp16))[name = tensor<string, []>("op_3608_cast_fp16")];
+            tensor<fp16, []> var_3609_to_fp16 = const()[name = tensor<string, []>("op_3609_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_311_cast_fp16 = mul(x = var_3608_cast_fp16, y = var_3609_to_fp16)[name = tensor<string, []>("aw_chunk_311_cast_fp16")];
+            tensor<string, []> var_3612_equation_0 = const()[name = tensor<string, []>("op_3612_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3612_cast_fp16 = einsum(equation = var_3612_equation_0, values = (var_3482_cast_fp16, var_3220_cast_fp16))[name = tensor<string, []>("op_3612_cast_fp16")];
+            tensor<fp16, []> var_3613_to_fp16 = const()[name = tensor<string, []>("op_3613_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_313_cast_fp16 = mul(x = var_3612_cast_fp16, y = var_3613_to_fp16)[name = tensor<string, []>("aw_chunk_313_cast_fp16")];
+            tensor<string, []> var_3616_equation_0 = const()[name = tensor<string, []>("op_3616_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3616_cast_fp16 = einsum(equation = var_3616_equation_0, values = (var_3482_cast_fp16, var_3227_cast_fp16))[name = tensor<string, []>("op_3616_cast_fp16")];
+            tensor<fp16, []> var_3617_to_fp16 = const()[name = tensor<string, []>("op_3617_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_315_cast_fp16 = mul(x = var_3616_cast_fp16, y = var_3617_to_fp16)[name = tensor<string, []>("aw_chunk_315_cast_fp16")];
+            tensor<string, []> var_3620_equation_0 = const()[name = tensor<string, []>("op_3620_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3620_cast_fp16 = einsum(equation = var_3620_equation_0, values = (var_3482_cast_fp16, var_3234_cast_fp16))[name = tensor<string, []>("op_3620_cast_fp16")];
+            tensor<fp16, []> var_3621_to_fp16 = const()[name = tensor<string, []>("op_3621_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_317_cast_fp16 = mul(x = var_3620_cast_fp16, y = var_3621_to_fp16)[name = tensor<string, []>("aw_chunk_317_cast_fp16")];
+            tensor<string, []> var_3624_equation_0 = const()[name = tensor<string, []>("op_3624_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3624_cast_fp16 = einsum(equation = var_3624_equation_0, values = (var_3482_cast_fp16, var_3241_cast_fp16))[name = tensor<string, []>("op_3624_cast_fp16")];
+            tensor<fp16, []> var_3625_to_fp16 = const()[name = tensor<string, []>("op_3625_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_319_cast_fp16 = mul(x = var_3624_cast_fp16, y = var_3625_to_fp16)[name = tensor<string, []>("aw_chunk_319_cast_fp16")];
+            tensor<string, []> var_3628_equation_0 = const()[name = tensor<string, []>("op_3628_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3628_cast_fp16 = einsum(equation = var_3628_equation_0, values = (var_3486_cast_fp16, var_3248_cast_fp16))[name = tensor<string, []>("op_3628_cast_fp16")];
+            tensor<fp16, []> var_3629_to_fp16 = const()[name = tensor<string, []>("op_3629_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_321_cast_fp16 = mul(x = var_3628_cast_fp16, y = var_3629_to_fp16)[name = tensor<string, []>("aw_chunk_321_cast_fp16")];
+            tensor<string, []> var_3632_equation_0 = const()[name = tensor<string, []>("op_3632_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3632_cast_fp16 = einsum(equation = var_3632_equation_0, values = (var_3486_cast_fp16, var_3255_cast_fp16))[name = tensor<string, []>("op_3632_cast_fp16")];
+            tensor<fp16, []> var_3633_to_fp16 = const()[name = tensor<string, []>("op_3633_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_323_cast_fp16 = mul(x = var_3632_cast_fp16, y = var_3633_to_fp16)[name = tensor<string, []>("aw_chunk_323_cast_fp16")];
+            tensor<string, []> var_3636_equation_0 = const()[name = tensor<string, []>("op_3636_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3636_cast_fp16 = einsum(equation = var_3636_equation_0, values = (var_3486_cast_fp16, var_3262_cast_fp16))[name = tensor<string, []>("op_3636_cast_fp16")];
+            tensor<fp16, []> var_3637_to_fp16 = const()[name = tensor<string, []>("op_3637_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_325_cast_fp16 = mul(x = var_3636_cast_fp16, y = var_3637_to_fp16)[name = tensor<string, []>("aw_chunk_325_cast_fp16")];
+            tensor<string, []> var_3640_equation_0 = const()[name = tensor<string, []>("op_3640_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3640_cast_fp16 = einsum(equation = var_3640_equation_0, values = (var_3486_cast_fp16, var_3269_cast_fp16))[name = tensor<string, []>("op_3640_cast_fp16")];
+            tensor<fp16, []> var_3641_to_fp16 = const()[name = tensor<string, []>("op_3641_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_327_cast_fp16 = mul(x = var_3640_cast_fp16, y = var_3641_to_fp16)[name = tensor<string, []>("aw_chunk_327_cast_fp16")];
+            tensor<string, []> var_3644_equation_0 = const()[name = tensor<string, []>("op_3644_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3644_cast_fp16 = einsum(equation = var_3644_equation_0, values = (var_3490_cast_fp16, var_3276_cast_fp16))[name = tensor<string, []>("op_3644_cast_fp16")];
+            tensor<fp16, []> var_3645_to_fp16 = const()[name = tensor<string, []>("op_3645_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_329_cast_fp16 = mul(x = var_3644_cast_fp16, y = var_3645_to_fp16)[name = tensor<string, []>("aw_chunk_329_cast_fp16")];
+            tensor<string, []> var_3648_equation_0 = const()[name = tensor<string, []>("op_3648_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3648_cast_fp16 = einsum(equation = var_3648_equation_0, values = (var_3490_cast_fp16, var_3283_cast_fp16))[name = tensor<string, []>("op_3648_cast_fp16")];
+            tensor<fp16, []> var_3649_to_fp16 = const()[name = tensor<string, []>("op_3649_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_331_cast_fp16 = mul(x = var_3648_cast_fp16, y = var_3649_to_fp16)[name = tensor<string, []>("aw_chunk_331_cast_fp16")];
+            tensor<string, []> var_3652_equation_0 = const()[name = tensor<string, []>("op_3652_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3652_cast_fp16 = einsum(equation = var_3652_equation_0, values = (var_3490_cast_fp16, var_3290_cast_fp16))[name = tensor<string, []>("op_3652_cast_fp16")];
+            tensor<fp16, []> var_3653_to_fp16 = const()[name = tensor<string, []>("op_3653_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_333_cast_fp16 = mul(x = var_3652_cast_fp16, y = var_3653_to_fp16)[name = tensor<string, []>("aw_chunk_333_cast_fp16")];
+            tensor<string, []> var_3656_equation_0 = const()[name = tensor<string, []>("op_3656_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3656_cast_fp16 = einsum(equation = var_3656_equation_0, values = (var_3490_cast_fp16, var_3297_cast_fp16))[name = tensor<string, []>("op_3656_cast_fp16")];
+            tensor<fp16, []> var_3657_to_fp16 = const()[name = tensor<string, []>("op_3657_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_335_cast_fp16 = mul(x = var_3656_cast_fp16, y = var_3657_to_fp16)[name = tensor<string, []>("aw_chunk_335_cast_fp16")];
+            tensor<string, []> var_3660_equation_0 = const()[name = tensor<string, []>("op_3660_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3660_cast_fp16 = einsum(equation = var_3660_equation_0, values = (var_3494_cast_fp16, var_3304_cast_fp16))[name = tensor<string, []>("op_3660_cast_fp16")];
+            tensor<fp16, []> var_3661_to_fp16 = const()[name = tensor<string, []>("op_3661_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_337_cast_fp16 = mul(x = var_3660_cast_fp16, y = var_3661_to_fp16)[name = tensor<string, []>("aw_chunk_337_cast_fp16")];
+            tensor<string, []> var_3664_equation_0 = const()[name = tensor<string, []>("op_3664_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3664_cast_fp16 = einsum(equation = var_3664_equation_0, values = (var_3494_cast_fp16, var_3311_cast_fp16))[name = tensor<string, []>("op_3664_cast_fp16")];
+            tensor<fp16, []> var_3665_to_fp16 = const()[name = tensor<string, []>("op_3665_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_339_cast_fp16 = mul(x = var_3664_cast_fp16, y = var_3665_to_fp16)[name = tensor<string, []>("aw_chunk_339_cast_fp16")];
+            tensor<string, []> var_3668_equation_0 = const()[name = tensor<string, []>("op_3668_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3668_cast_fp16 = einsum(equation = var_3668_equation_0, values = (var_3494_cast_fp16, var_3318_cast_fp16))[name = tensor<string, []>("op_3668_cast_fp16")];
+            tensor<fp16, []> var_3669_to_fp16 = const()[name = tensor<string, []>("op_3669_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_341_cast_fp16 = mul(x = var_3668_cast_fp16, y = var_3669_to_fp16)[name = tensor<string, []>("aw_chunk_341_cast_fp16")];
+            tensor<string, []> var_3672_equation_0 = const()[name = tensor<string, []>("op_3672_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3672_cast_fp16 = einsum(equation = var_3672_equation_0, values = (var_3494_cast_fp16, var_3325_cast_fp16))[name = tensor<string, []>("op_3672_cast_fp16")];
+            tensor<fp16, []> var_3673_to_fp16 = const()[name = tensor<string, []>("op_3673_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_343_cast_fp16 = mul(x = var_3672_cast_fp16, y = var_3673_to_fp16)[name = tensor<string, []>("aw_chunk_343_cast_fp16")];
+            tensor<string, []> var_3676_equation_0 = const()[name = tensor<string, []>("op_3676_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3676_cast_fp16 = einsum(equation = var_3676_equation_0, values = (var_3498_cast_fp16, var_3332_cast_fp16))[name = tensor<string, []>("op_3676_cast_fp16")];
+            tensor<fp16, []> var_3677_to_fp16 = const()[name = tensor<string, []>("op_3677_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_345_cast_fp16 = mul(x = var_3676_cast_fp16, y = var_3677_to_fp16)[name = tensor<string, []>("aw_chunk_345_cast_fp16")];
+            tensor<string, []> var_3680_equation_0 = const()[name = tensor<string, []>("op_3680_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3680_cast_fp16 = einsum(equation = var_3680_equation_0, values = (var_3498_cast_fp16, var_3339_cast_fp16))[name = tensor<string, []>("op_3680_cast_fp16")];
+            tensor<fp16, []> var_3681_to_fp16 = const()[name = tensor<string, []>("op_3681_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_347_cast_fp16 = mul(x = var_3680_cast_fp16, y = var_3681_to_fp16)[name = tensor<string, []>("aw_chunk_347_cast_fp16")];
+            tensor<string, []> var_3684_equation_0 = const()[name = tensor<string, []>("op_3684_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3684_cast_fp16 = einsum(equation = var_3684_equation_0, values = (var_3498_cast_fp16, var_3346_cast_fp16))[name = tensor<string, []>("op_3684_cast_fp16")];
+            tensor<fp16, []> var_3685_to_fp16 = const()[name = tensor<string, []>("op_3685_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_349_cast_fp16 = mul(x = var_3684_cast_fp16, y = var_3685_to_fp16)[name = tensor<string, []>("aw_chunk_349_cast_fp16")];
+            tensor<string, []> var_3688_equation_0 = const()[name = tensor<string, []>("op_3688_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3688_cast_fp16 = einsum(equation = var_3688_equation_0, values = (var_3498_cast_fp16, var_3353_cast_fp16))[name = tensor<string, []>("op_3688_cast_fp16")];
+            tensor<fp16, []> var_3689_to_fp16 = const()[name = tensor<string, []>("op_3689_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_351_cast_fp16 = mul(x = var_3688_cast_fp16, y = var_3689_to_fp16)[name = tensor<string, []>("aw_chunk_351_cast_fp16")];
+            tensor<string, []> var_3692_equation_0 = const()[name = tensor<string, []>("op_3692_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3692_cast_fp16 = einsum(equation = var_3692_equation_0, values = (var_3502_cast_fp16, var_3360_cast_fp16))[name = tensor<string, []>("op_3692_cast_fp16")];
+            tensor<fp16, []> var_3693_to_fp16 = const()[name = tensor<string, []>("op_3693_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_353_cast_fp16 = mul(x = var_3692_cast_fp16, y = var_3693_to_fp16)[name = tensor<string, []>("aw_chunk_353_cast_fp16")];
+            tensor<string, []> var_3696_equation_0 = const()[name = tensor<string, []>("op_3696_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3696_cast_fp16 = einsum(equation = var_3696_equation_0, values = (var_3502_cast_fp16, var_3367_cast_fp16))[name = tensor<string, []>("op_3696_cast_fp16")];
+            tensor<fp16, []> var_3697_to_fp16 = const()[name = tensor<string, []>("op_3697_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_355_cast_fp16 = mul(x = var_3696_cast_fp16, y = var_3697_to_fp16)[name = tensor<string, []>("aw_chunk_355_cast_fp16")];
+            tensor<string, []> var_3700_equation_0 = const()[name = tensor<string, []>("op_3700_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3700_cast_fp16 = einsum(equation = var_3700_equation_0, values = (var_3502_cast_fp16, var_3374_cast_fp16))[name = tensor<string, []>("op_3700_cast_fp16")];
+            tensor<fp16, []> var_3701_to_fp16 = const()[name = tensor<string, []>("op_3701_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_357_cast_fp16 = mul(x = var_3700_cast_fp16, y = var_3701_to_fp16)[name = tensor<string, []>("aw_chunk_357_cast_fp16")];
+            tensor<string, []> var_3704_equation_0 = const()[name = tensor<string, []>("op_3704_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3704_cast_fp16 = einsum(equation = var_3704_equation_0, values = (var_3502_cast_fp16, var_3381_cast_fp16))[name = tensor<string, []>("op_3704_cast_fp16")];
+            tensor<fp16, []> var_3705_to_fp16 = const()[name = tensor<string, []>("op_3705_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_359_cast_fp16 = mul(x = var_3704_cast_fp16, y = var_3705_to_fp16)[name = tensor<string, []>("aw_chunk_359_cast_fp16")];
+            tensor<string, []> var_3708_equation_0 = const()[name = tensor<string, []>("op_3708_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3708_cast_fp16 = einsum(equation = var_3708_equation_0, values = (var_3506_cast_fp16, var_3388_cast_fp16))[name = tensor<string, []>("op_3708_cast_fp16")];
+            tensor<fp16, []> var_3709_to_fp16 = const()[name = tensor<string, []>("op_3709_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_361_cast_fp16 = mul(x = var_3708_cast_fp16, y = var_3709_to_fp16)[name = tensor<string, []>("aw_chunk_361_cast_fp16")];
+            tensor<string, []> var_3712_equation_0 = const()[name = tensor<string, []>("op_3712_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3712_cast_fp16 = einsum(equation = var_3712_equation_0, values = (var_3506_cast_fp16, var_3395_cast_fp16))[name = tensor<string, []>("op_3712_cast_fp16")];
+            tensor<fp16, []> var_3713_to_fp16 = const()[name = tensor<string, []>("op_3713_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_363_cast_fp16 = mul(x = var_3712_cast_fp16, y = var_3713_to_fp16)[name = tensor<string, []>("aw_chunk_363_cast_fp16")];
+            tensor<string, []> var_3716_equation_0 = const()[name = tensor<string, []>("op_3716_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3716_cast_fp16 = einsum(equation = var_3716_equation_0, values = (var_3506_cast_fp16, var_3402_cast_fp16))[name = tensor<string, []>("op_3716_cast_fp16")];
+            tensor<fp16, []> var_3717_to_fp16 = const()[name = tensor<string, []>("op_3717_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_365_cast_fp16 = mul(x = var_3716_cast_fp16, y = var_3717_to_fp16)[name = tensor<string, []>("aw_chunk_365_cast_fp16")];
+            tensor<string, []> var_3720_equation_0 = const()[name = tensor<string, []>("op_3720_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3720_cast_fp16 = einsum(equation = var_3720_equation_0, values = (var_3506_cast_fp16, var_3409_cast_fp16))[name = tensor<string, []>("op_3720_cast_fp16")];
+            tensor<fp16, []> var_3721_to_fp16 = const()[name = tensor<string, []>("op_3721_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_367_cast_fp16 = mul(x = var_3720_cast_fp16, y = var_3721_to_fp16)[name = tensor<string, []>("aw_chunk_367_cast_fp16")];
+            tensor<string, []> var_3724_equation_0 = const()[name = tensor<string, []>("op_3724_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3724_cast_fp16 = einsum(equation = var_3724_equation_0, values = (var_3510_cast_fp16, var_3416_cast_fp16))[name = tensor<string, []>("op_3724_cast_fp16")];
+            tensor<fp16, []> var_3725_to_fp16 = const()[name = tensor<string, []>("op_3725_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_369_cast_fp16 = mul(x = var_3724_cast_fp16, y = var_3725_to_fp16)[name = tensor<string, []>("aw_chunk_369_cast_fp16")];
+            tensor<string, []> var_3728_equation_0 = const()[name = tensor<string, []>("op_3728_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3728_cast_fp16 = einsum(equation = var_3728_equation_0, values = (var_3510_cast_fp16, var_3423_cast_fp16))[name = tensor<string, []>("op_3728_cast_fp16")];
+            tensor<fp16, []> var_3729_to_fp16 = const()[name = tensor<string, []>("op_3729_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_371_cast_fp16 = mul(x = var_3728_cast_fp16, y = var_3729_to_fp16)[name = tensor<string, []>("aw_chunk_371_cast_fp16")];
+            tensor<string, []> var_3732_equation_0 = const()[name = tensor<string, []>("op_3732_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3732_cast_fp16 = einsum(equation = var_3732_equation_0, values = (var_3510_cast_fp16, var_3430_cast_fp16))[name = tensor<string, []>("op_3732_cast_fp16")];
+            tensor<fp16, []> var_3733_to_fp16 = const()[name = tensor<string, []>("op_3733_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_373_cast_fp16 = mul(x = var_3732_cast_fp16, y = var_3733_to_fp16)[name = tensor<string, []>("aw_chunk_373_cast_fp16")];
+            tensor<string, []> var_3736_equation_0 = const()[name = tensor<string, []>("op_3736_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3736_cast_fp16 = einsum(equation = var_3736_equation_0, values = (var_3510_cast_fp16, var_3437_cast_fp16))[name = tensor<string, []>("op_3736_cast_fp16")];
+            tensor<fp16, []> var_3737_to_fp16 = const()[name = tensor<string, []>("op_3737_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_375_cast_fp16 = mul(x = var_3736_cast_fp16, y = var_3737_to_fp16)[name = tensor<string, []>("aw_chunk_375_cast_fp16")];
+            tensor<string, []> var_3740_equation_0 = const()[name = tensor<string, []>("op_3740_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3740_cast_fp16 = einsum(equation = var_3740_equation_0, values = (var_3514_cast_fp16, var_3444_cast_fp16))[name = tensor<string, []>("op_3740_cast_fp16")];
+            tensor<fp16, []> var_3741_to_fp16 = const()[name = tensor<string, []>("op_3741_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_377_cast_fp16 = mul(x = var_3740_cast_fp16, y = var_3741_to_fp16)[name = tensor<string, []>("aw_chunk_377_cast_fp16")];
+            tensor<string, []> var_3744_equation_0 = const()[name = tensor<string, []>("op_3744_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3744_cast_fp16 = einsum(equation = var_3744_equation_0, values = (var_3514_cast_fp16, var_3451_cast_fp16))[name = tensor<string, []>("op_3744_cast_fp16")];
+            tensor<fp16, []> var_3745_to_fp16 = const()[name = tensor<string, []>("op_3745_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_379_cast_fp16 = mul(x = var_3744_cast_fp16, y = var_3745_to_fp16)[name = tensor<string, []>("aw_chunk_379_cast_fp16")];
+            tensor<string, []> var_3748_equation_0 = const()[name = tensor<string, []>("op_3748_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3748_cast_fp16 = einsum(equation = var_3748_equation_0, values = (var_3514_cast_fp16, var_3458_cast_fp16))[name = tensor<string, []>("op_3748_cast_fp16")];
+            tensor<fp16, []> var_3749_to_fp16 = const()[name = tensor<string, []>("op_3749_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_381_cast_fp16 = mul(x = var_3748_cast_fp16, y = var_3749_to_fp16)[name = tensor<string, []>("aw_chunk_381_cast_fp16")];
+            tensor<string, []> var_3752_equation_0 = const()[name = tensor<string, []>("op_3752_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3752_cast_fp16 = einsum(equation = var_3752_equation_0, values = (var_3514_cast_fp16, var_3465_cast_fp16))[name = tensor<string, []>("op_3752_cast_fp16")];
+            tensor<fp16, []> var_3753_to_fp16 = const()[name = tensor<string, []>("op_3753_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_383_cast_fp16 = mul(x = var_3752_cast_fp16, y = var_3753_to_fp16)[name = tensor<string, []>("aw_chunk_383_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3755_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_289_cast_fp16)[name = tensor<string, []>("op_3755_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3756_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_291_cast_fp16)[name = tensor<string, []>("op_3756_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3757_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_293_cast_fp16)[name = tensor<string, []>("op_3757_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3758_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_295_cast_fp16)[name = tensor<string, []>("op_3758_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3759_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_297_cast_fp16)[name = tensor<string, []>("op_3759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3760_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_299_cast_fp16)[name = tensor<string, []>("op_3760_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3761_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_301_cast_fp16)[name = tensor<string, []>("op_3761_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3762_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_303_cast_fp16)[name = tensor<string, []>("op_3762_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3763_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_305_cast_fp16)[name = tensor<string, []>("op_3763_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3764_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_307_cast_fp16)[name = tensor<string, []>("op_3764_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3765_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_309_cast_fp16)[name = tensor<string, []>("op_3765_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3766_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_311_cast_fp16)[name = tensor<string, []>("op_3766_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3767_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_313_cast_fp16)[name = tensor<string, []>("op_3767_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3768_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_315_cast_fp16)[name = tensor<string, []>("op_3768_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3769_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_317_cast_fp16)[name = tensor<string, []>("op_3769_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3770_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_319_cast_fp16)[name = tensor<string, []>("op_3770_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3771_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_321_cast_fp16)[name = tensor<string, []>("op_3771_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3772_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_323_cast_fp16)[name = tensor<string, []>("op_3772_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3773_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_325_cast_fp16)[name = tensor<string, []>("op_3773_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3774_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_327_cast_fp16)[name = tensor<string, []>("op_3774_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3775_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_329_cast_fp16)[name = tensor<string, []>("op_3775_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3776_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_331_cast_fp16)[name = tensor<string, []>("op_3776_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3777_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_333_cast_fp16)[name = tensor<string, []>("op_3777_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3778_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_335_cast_fp16)[name = tensor<string, []>("op_3778_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3779_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_337_cast_fp16)[name = tensor<string, []>("op_3779_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3780_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_339_cast_fp16)[name = tensor<string, []>("op_3780_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3781_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_341_cast_fp16)[name = tensor<string, []>("op_3781_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3782_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_343_cast_fp16)[name = tensor<string, []>("op_3782_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3783_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_345_cast_fp16)[name = tensor<string, []>("op_3783_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3784_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_347_cast_fp16)[name = tensor<string, []>("op_3784_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3785_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_349_cast_fp16)[name = tensor<string, []>("op_3785_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3786_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_351_cast_fp16)[name = tensor<string, []>("op_3786_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3787_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_353_cast_fp16)[name = tensor<string, []>("op_3787_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3788_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_355_cast_fp16)[name = tensor<string, []>("op_3788_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3789_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_357_cast_fp16)[name = tensor<string, []>("op_3789_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3790_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_359_cast_fp16)[name = tensor<string, []>("op_3790_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3791_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_361_cast_fp16)[name = tensor<string, []>("op_3791_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3792_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_363_cast_fp16)[name = tensor<string, []>("op_3792_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3793_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_365_cast_fp16)[name = tensor<string, []>("op_3793_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3794_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_367_cast_fp16)[name = tensor<string, []>("op_3794_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3795_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_369_cast_fp16)[name = tensor<string, []>("op_3795_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3796_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_371_cast_fp16)[name = tensor<string, []>("op_3796_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3797_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_373_cast_fp16)[name = tensor<string, []>("op_3797_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3798_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_375_cast_fp16)[name = tensor<string, []>("op_3798_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3799_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_377_cast_fp16)[name = tensor<string, []>("op_3799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3800_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_379_cast_fp16)[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3801_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_381_cast_fp16)[name = tensor<string, []>("op_3801_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_3802_cast_fp16 = softmax(axis = var_3028, x = aw_chunk_383_cast_fp16)[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<string, []> var_3804_equation_0 = const()[name = tensor<string, []>("op_3804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3804_cast_fp16 = einsum(equation = var_3804_equation_0, values = (var_3516_cast_fp16, var_3755_cast_fp16))[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<string, []> var_3806_equation_0 = const()[name = tensor<string, []>("op_3806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3806_cast_fp16 = einsum(equation = var_3806_equation_0, values = (var_3516_cast_fp16, var_3756_cast_fp16))[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<string, []> var_3808_equation_0 = const()[name = tensor<string, []>("op_3808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3808_cast_fp16 = einsum(equation = var_3808_equation_0, values = (var_3516_cast_fp16, var_3757_cast_fp16))[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<string, []> var_3810_equation_0 = const()[name = tensor<string, []>("op_3810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3810_cast_fp16 = einsum(equation = var_3810_equation_0, values = (var_3516_cast_fp16, var_3758_cast_fp16))[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<string, []> var_3812_equation_0 = const()[name = tensor<string, []>("op_3812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3812_cast_fp16 = einsum(equation = var_3812_equation_0, values = (var_3520_cast_fp16, var_3759_cast_fp16))[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<string, []> var_3814_equation_0 = const()[name = tensor<string, []>("op_3814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3814_cast_fp16 = einsum(equation = var_3814_equation_0, values = (var_3520_cast_fp16, var_3760_cast_fp16))[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3520_cast_fp16, var_3761_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3520_cast_fp16, var_3762_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3524_cast_fp16, var_3763_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3524_cast_fp16, var_3764_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3524_cast_fp16, var_3765_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3524_cast_fp16, var_3766_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3528_cast_fp16, var_3767_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3528_cast_fp16, var_3768_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3528_cast_fp16, var_3769_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3528_cast_fp16, var_3770_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3532_cast_fp16, var_3771_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<string, []> var_3838_equation_0 = const()[name = tensor<string, []>("op_3838_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3838_cast_fp16 = einsum(equation = var_3838_equation_0, values = (var_3532_cast_fp16, var_3772_cast_fp16))[name = tensor<string, []>("op_3838_cast_fp16")];
+            tensor<string, []> var_3840_equation_0 = const()[name = tensor<string, []>("op_3840_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3840_cast_fp16 = einsum(equation = var_3840_equation_0, values = (var_3532_cast_fp16, var_3773_cast_fp16))[name = tensor<string, []>("op_3840_cast_fp16")];
+            tensor<string, []> var_3842_equation_0 = const()[name = tensor<string, []>("op_3842_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3842_cast_fp16 = einsum(equation = var_3842_equation_0, values = (var_3532_cast_fp16, var_3774_cast_fp16))[name = tensor<string, []>("op_3842_cast_fp16")];
+            tensor<string, []> var_3844_equation_0 = const()[name = tensor<string, []>("op_3844_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3844_cast_fp16 = einsum(equation = var_3844_equation_0, values = (var_3536_cast_fp16, var_3775_cast_fp16))[name = tensor<string, []>("op_3844_cast_fp16")];
+            tensor<string, []> var_3846_equation_0 = const()[name = tensor<string, []>("op_3846_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3846_cast_fp16 = einsum(equation = var_3846_equation_0, values = (var_3536_cast_fp16, var_3776_cast_fp16))[name = tensor<string, []>("op_3846_cast_fp16")];
+            tensor<string, []> var_3848_equation_0 = const()[name = tensor<string, []>("op_3848_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3848_cast_fp16 = einsum(equation = var_3848_equation_0, values = (var_3536_cast_fp16, var_3777_cast_fp16))[name = tensor<string, []>("op_3848_cast_fp16")];
+            tensor<string, []> var_3850_equation_0 = const()[name = tensor<string, []>("op_3850_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3850_cast_fp16 = einsum(equation = var_3850_equation_0, values = (var_3536_cast_fp16, var_3778_cast_fp16))[name = tensor<string, []>("op_3850_cast_fp16")];
+            tensor<string, []> var_3852_equation_0 = const()[name = tensor<string, []>("op_3852_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3852_cast_fp16 = einsum(equation = var_3852_equation_0, values = (var_3540_cast_fp16, var_3779_cast_fp16))[name = tensor<string, []>("op_3852_cast_fp16")];
+            tensor<string, []> var_3854_equation_0 = const()[name = tensor<string, []>("op_3854_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3854_cast_fp16 = einsum(equation = var_3854_equation_0, values = (var_3540_cast_fp16, var_3780_cast_fp16))[name = tensor<string, []>("op_3854_cast_fp16")];
+            tensor<string, []> var_3856_equation_0 = const()[name = tensor<string, []>("op_3856_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3856_cast_fp16 = einsum(equation = var_3856_equation_0, values = (var_3540_cast_fp16, var_3781_cast_fp16))[name = tensor<string, []>("op_3856_cast_fp16")];
+            tensor<string, []> var_3858_equation_0 = const()[name = tensor<string, []>("op_3858_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3858_cast_fp16 = einsum(equation = var_3858_equation_0, values = (var_3540_cast_fp16, var_3782_cast_fp16))[name = tensor<string, []>("op_3858_cast_fp16")];
+            tensor<string, []> var_3860_equation_0 = const()[name = tensor<string, []>("op_3860_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3860_cast_fp16 = einsum(equation = var_3860_equation_0, values = (var_3544_cast_fp16, var_3783_cast_fp16))[name = tensor<string, []>("op_3860_cast_fp16")];
+            tensor<string, []> var_3862_equation_0 = const()[name = tensor<string, []>("op_3862_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3862_cast_fp16 = einsum(equation = var_3862_equation_0, values = (var_3544_cast_fp16, var_3784_cast_fp16))[name = tensor<string, []>("op_3862_cast_fp16")];
+            tensor<string, []> var_3864_equation_0 = const()[name = tensor<string, []>("op_3864_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3864_cast_fp16 = einsum(equation = var_3864_equation_0, values = (var_3544_cast_fp16, var_3785_cast_fp16))[name = tensor<string, []>("op_3864_cast_fp16")];
+            tensor<string, []> var_3866_equation_0 = const()[name = tensor<string, []>("op_3866_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3866_cast_fp16 = einsum(equation = var_3866_equation_0, values = (var_3544_cast_fp16, var_3786_cast_fp16))[name = tensor<string, []>("op_3866_cast_fp16")];
+            tensor<string, []> var_3868_equation_0 = const()[name = tensor<string, []>("op_3868_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3868_cast_fp16 = einsum(equation = var_3868_equation_0, values = (var_3548_cast_fp16, var_3787_cast_fp16))[name = tensor<string, []>("op_3868_cast_fp16")];
+            tensor<string, []> var_3870_equation_0 = const()[name = tensor<string, []>("op_3870_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3870_cast_fp16 = einsum(equation = var_3870_equation_0, values = (var_3548_cast_fp16, var_3788_cast_fp16))[name = tensor<string, []>("op_3870_cast_fp16")];
+            tensor<string, []> var_3872_equation_0 = const()[name = tensor<string, []>("op_3872_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3872_cast_fp16 = einsum(equation = var_3872_equation_0, values = (var_3548_cast_fp16, var_3789_cast_fp16))[name = tensor<string, []>("op_3872_cast_fp16")];
+            tensor<string, []> var_3874_equation_0 = const()[name = tensor<string, []>("op_3874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3874_cast_fp16 = einsum(equation = var_3874_equation_0, values = (var_3548_cast_fp16, var_3790_cast_fp16))[name = tensor<string, []>("op_3874_cast_fp16")];
+            tensor<string, []> var_3876_equation_0 = const()[name = tensor<string, []>("op_3876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3876_cast_fp16 = einsum(equation = var_3876_equation_0, values = (var_3552_cast_fp16, var_3791_cast_fp16))[name = tensor<string, []>("op_3876_cast_fp16")];
+            tensor<string, []> var_3878_equation_0 = const()[name = tensor<string, []>("op_3878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3878_cast_fp16 = einsum(equation = var_3878_equation_0, values = (var_3552_cast_fp16, var_3792_cast_fp16))[name = tensor<string, []>("op_3878_cast_fp16")];
+            tensor<string, []> var_3880_equation_0 = const()[name = tensor<string, []>("op_3880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3880_cast_fp16 = einsum(equation = var_3880_equation_0, values = (var_3552_cast_fp16, var_3793_cast_fp16))[name = tensor<string, []>("op_3880_cast_fp16")];
+            tensor<string, []> var_3882_equation_0 = const()[name = tensor<string, []>("op_3882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3882_cast_fp16 = einsum(equation = var_3882_equation_0, values = (var_3552_cast_fp16, var_3794_cast_fp16))[name = tensor<string, []>("op_3882_cast_fp16")];
+            tensor<string, []> var_3884_equation_0 = const()[name = tensor<string, []>("op_3884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3884_cast_fp16 = einsum(equation = var_3884_equation_0, values = (var_3556_cast_fp16, var_3795_cast_fp16))[name = tensor<string, []>("op_3884_cast_fp16")];
+            tensor<string, []> var_3886_equation_0 = const()[name = tensor<string, []>("op_3886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3886_cast_fp16 = einsum(equation = var_3886_equation_0, values = (var_3556_cast_fp16, var_3796_cast_fp16))[name = tensor<string, []>("op_3886_cast_fp16")];
+            tensor<string, []> var_3888_equation_0 = const()[name = tensor<string, []>("op_3888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3888_cast_fp16 = einsum(equation = var_3888_equation_0, values = (var_3556_cast_fp16, var_3797_cast_fp16))[name = tensor<string, []>("op_3888_cast_fp16")];
+            tensor<string, []> var_3890_equation_0 = const()[name = tensor<string, []>("op_3890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3890_cast_fp16 = einsum(equation = var_3890_equation_0, values = (var_3556_cast_fp16, var_3798_cast_fp16))[name = tensor<string, []>("op_3890_cast_fp16")];
+            tensor<string, []> var_3892_equation_0 = const()[name = tensor<string, []>("op_3892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3892_cast_fp16 = einsum(equation = var_3892_equation_0, values = (var_3560_cast_fp16, var_3799_cast_fp16))[name = tensor<string, []>("op_3892_cast_fp16")];
+            tensor<string, []> var_3894_equation_0 = const()[name = tensor<string, []>("op_3894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3894_cast_fp16 = einsum(equation = var_3894_equation_0, values = (var_3560_cast_fp16, var_3800_cast_fp16))[name = tensor<string, []>("op_3894_cast_fp16")];
+            tensor<string, []> var_3896_equation_0 = const()[name = tensor<string, []>("op_3896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3896_cast_fp16 = einsum(equation = var_3896_equation_0, values = (var_3560_cast_fp16, var_3801_cast_fp16))[name = tensor<string, []>("op_3896_cast_fp16")];
+            tensor<string, []> var_3898_equation_0 = const()[name = tensor<string, []>("op_3898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_3898_cast_fp16 = einsum(equation = var_3898_equation_0, values = (var_3560_cast_fp16, var_3802_cast_fp16))[name = tensor<string, []>("op_3898_cast_fp16")];
+            tensor<bool, []> var_3900_interleave_0 = const()[name = tensor<string, []>("op_3900_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3900_cast_fp16 = concat(axis = var_3011, interleave = var_3900_interleave_0, values = (var_3804_cast_fp16, var_3806_cast_fp16, var_3808_cast_fp16, var_3810_cast_fp16))[name = tensor<string, []>("op_3900_cast_fp16")];
+            tensor<bool, []> var_3902_interleave_0 = const()[name = tensor<string, []>("op_3902_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3902_cast_fp16 = concat(axis = var_3011, interleave = var_3902_interleave_0, values = (var_3812_cast_fp16, var_3814_cast_fp16, var_3816_cast_fp16, var_3818_cast_fp16))[name = tensor<string, []>("op_3902_cast_fp16")];
+            tensor<bool, []> var_3904_interleave_0 = const()[name = tensor<string, []>("op_3904_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3904_cast_fp16 = concat(axis = var_3011, interleave = var_3904_interleave_0, values = (var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16))[name = tensor<string, []>("op_3904_cast_fp16")];
+            tensor<bool, []> var_3906_interleave_0 = const()[name = tensor<string, []>("op_3906_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3906_cast_fp16 = concat(axis = var_3011, interleave = var_3906_interleave_0, values = (var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16))[name = tensor<string, []>("op_3906_cast_fp16")];
+            tensor<bool, []> var_3908_interleave_0 = const()[name = tensor<string, []>("op_3908_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3908_cast_fp16 = concat(axis = var_3011, interleave = var_3908_interleave_0, values = (var_3836_cast_fp16, var_3838_cast_fp16, var_3840_cast_fp16, var_3842_cast_fp16))[name = tensor<string, []>("op_3908_cast_fp16")];
+            tensor<bool, []> var_3910_interleave_0 = const()[name = tensor<string, []>("op_3910_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3910_cast_fp16 = concat(axis = var_3011, interleave = var_3910_interleave_0, values = (var_3844_cast_fp16, var_3846_cast_fp16, var_3848_cast_fp16, var_3850_cast_fp16))[name = tensor<string, []>("op_3910_cast_fp16")];
+            tensor<bool, []> var_3912_interleave_0 = const()[name = tensor<string, []>("op_3912_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3912_cast_fp16 = concat(axis = var_3011, interleave = var_3912_interleave_0, values = (var_3852_cast_fp16, var_3854_cast_fp16, var_3856_cast_fp16, var_3858_cast_fp16))[name = tensor<string, []>("op_3912_cast_fp16")];
+            tensor<bool, []> var_3914_interleave_0 = const()[name = tensor<string, []>("op_3914_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3914_cast_fp16 = concat(axis = var_3011, interleave = var_3914_interleave_0, values = (var_3860_cast_fp16, var_3862_cast_fp16, var_3864_cast_fp16, var_3866_cast_fp16))[name = tensor<string, []>("op_3914_cast_fp16")];
+            tensor<bool, []> var_3916_interleave_0 = const()[name = tensor<string, []>("op_3916_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3916_cast_fp16 = concat(axis = var_3011, interleave = var_3916_interleave_0, values = (var_3868_cast_fp16, var_3870_cast_fp16, var_3872_cast_fp16, var_3874_cast_fp16))[name = tensor<string, []>("op_3916_cast_fp16")];
+            tensor<bool, []> var_3918_interleave_0 = const()[name = tensor<string, []>("op_3918_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3918_cast_fp16 = concat(axis = var_3011, interleave = var_3918_interleave_0, values = (var_3876_cast_fp16, var_3878_cast_fp16, var_3880_cast_fp16, var_3882_cast_fp16))[name = tensor<string, []>("op_3918_cast_fp16")];
+            tensor<bool, []> var_3920_interleave_0 = const()[name = tensor<string, []>("op_3920_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3920_cast_fp16 = concat(axis = var_3011, interleave = var_3920_interleave_0, values = (var_3884_cast_fp16, var_3886_cast_fp16, var_3888_cast_fp16, var_3890_cast_fp16))[name = tensor<string, []>("op_3920_cast_fp16")];
+            tensor<bool, []> var_3922_interleave_0 = const()[name = tensor<string, []>("op_3922_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3922_cast_fp16 = concat(axis = var_3011, interleave = var_3922_interleave_0, values = (var_3892_cast_fp16, var_3894_cast_fp16, var_3896_cast_fp16, var_3898_cast_fp16))[name = tensor<string, []>("op_3922_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_25_cast_fp16 = concat(axis = var_3028, interleave = input_25_interleave_0, values = (var_3900_cast_fp16, var_3902_cast_fp16, var_3904_cast_fp16, var_3906_cast_fp16, var_3908_cast_fp16, var_3910_cast_fp16, var_3912_cast_fp16, var_3914_cast_fp16, var_3916_cast_fp16, var_3918_cast_fp16, var_3920_cast_fp16, var_3922_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_3927 = const()[name = tensor<string, []>("op_3927"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3929 = const()[name = tensor<string, []>("op_3929"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_15_pad_type_0 = const()[name = tensor<string, []>("obj_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_15_pad_0 = const()[name = tensor<string, []>("obj_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52289280)))];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53468992)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_3929, groups = var_3028, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = var_3927, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_3935 = const()[name = tensor<string, []>("op_3935"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_3935, keep_dims = var_3029, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_3939 = const()[name = tensor<string, []>("op_3939"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_3940_cast_fp16 = reduce_mean(axes = var_3939, keep_dims = var_3029, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_3940_cast_fp16")];
+            tensor<fp16, []> var_3941_to_fp16 = const()[name = tensor<string, []>("op_3941_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_3942_cast_fp16 = add(x = var_3940_cast_fp16, y = var_3941_to_fp16)[name = tensor<string, []>("op_3942_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_3942_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [768]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53470592)))];
+            tensor<fp16, [768]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53472192)))];
+            tensor<fp16, []> input_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<int32, [2]> var_3953 = const()[name = tensor<string, []>("op_3953"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3955 = const()[name = tensor<string, []>("op_3955"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53473792)))];
+            tensor<fp16, [3072]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58192448)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_3955, groups = var_3028, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = var_3953, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_3961 = const()[name = tensor<string, []>("op_3961"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3963 = const()[name = tensor<string, []>("op_3963"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58198656)))];
+            tensor<fp16, [768]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62917312)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_3963, groups = var_3028, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_3961, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_3970 = const()[name = tensor<string, []>("op_3970"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_3987 = const()[name = tensor<string, []>("op_3987"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_3988 = const()[name = tensor<string, []>("op_3988"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_3998 = const()[name = tensor<string, []>("op_3998"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_3998, keep_dims = var_3988, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_4002 = const()[name = tensor<string, []>("op_4002"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4003_cast_fp16 = reduce_mean(axes = var_4002, keep_dims = var_3988, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_4003_cast_fp16")];
+            tensor<fp16, []> var_4004_to_fp16 = const()[name = tensor<string, []>("op_4004_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4005_cast_fp16 = add(x = var_4003_cast_fp16, y = var_4004_to_fp16)[name = tensor<string, []>("op_4005_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_4005_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [768]> obj_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62918912)))];
+            tensor<fp16, [768]> obj_17_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_17_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62920512)))];
+            tensor<fp16, []> obj_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("obj_17_cast_fp16")];
+            tensor<int32, [2]> var_4020 = const()[name = tensor<string, []>("op_4020"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4022 = const()[name = tensor<string, []>("op_4022"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62922112)))];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64101824)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_4022, groups = var_3987, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_4020, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_4026 = const()[name = tensor<string, []>("op_4026"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4028 = const()[name = tensor<string, []>("op_4028"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_9_pad_type_0 = const()[name = tensor<string, []>("key_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_9_pad_0 = const()[name = tensor<string, []>("key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64103424)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_9_cast_fp16 = conv(dilations = var_4028, groups = var_3987, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = var_4026, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<int32, [2]> var_4033 = const()[name = tensor<string, []>("op_4033"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4035 = const()[name = tensor<string, []>("op_4035"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_9_pad_type_0 = const()[name = tensor<string, []>("value_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_9_pad_0 = const()[name = tensor<string, []>("value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65283136)))];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66462848)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_4035, groups = var_3987, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = var_4033, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_4042_begin_0 = const()[name = tensor<string, []>("op_4042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4042_end_0 = const()[name = tensor<string, []>("op_4042_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4042_end_mask_0 = const()[name = tensor<string, []>("op_4042_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4042_cast_fp16 = slice_by_index(begin = var_4042_begin_0, end = var_4042_end_0, end_mask = var_4042_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4042_cast_fp16")];
+            tensor<int32, [4]> var_4046_begin_0 = const()[name = tensor<string, []>("op_4046_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_4046_end_0 = const()[name = tensor<string, []>("op_4046_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_4046_end_mask_0 = const()[name = tensor<string, []>("op_4046_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4046_cast_fp16 = slice_by_index(begin = var_4046_begin_0, end = var_4046_end_0, end_mask = var_4046_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<int32, [4]> var_4050_begin_0 = const()[name = tensor<string, []>("op_4050_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_4050_end_0 = const()[name = tensor<string, []>("op_4050_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_4050_end_mask_0 = const()[name = tensor<string, []>("op_4050_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4050_cast_fp16 = slice_by_index(begin = var_4050_begin_0, end = var_4050_end_0, end_mask = var_4050_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<int32, [4]> var_4054_begin_0 = const()[name = tensor<string, []>("op_4054_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_4054_end_0 = const()[name = tensor<string, []>("op_4054_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_4054_end_mask_0 = const()[name = tensor<string, []>("op_4054_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4054_cast_fp16 = slice_by_index(begin = var_4054_begin_0, end = var_4054_end_0, end_mask = var_4054_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4054_cast_fp16")];
+            tensor<int32, [4]> var_4058_begin_0 = const()[name = tensor<string, []>("op_4058_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_4058_end_0 = const()[name = tensor<string, []>("op_4058_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_4058_end_mask_0 = const()[name = tensor<string, []>("op_4058_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4058_cast_fp16 = slice_by_index(begin = var_4058_begin_0, end = var_4058_end_0, end_mask = var_4058_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4058_cast_fp16")];
+            tensor<int32, [4]> var_4062_begin_0 = const()[name = tensor<string, []>("op_4062_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_4062_end_0 = const()[name = tensor<string, []>("op_4062_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_4062_end_mask_0 = const()[name = tensor<string, []>("op_4062_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4062_cast_fp16 = slice_by_index(begin = var_4062_begin_0, end = var_4062_end_0, end_mask = var_4062_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4062_cast_fp16")];
+            tensor<int32, [4]> var_4066_begin_0 = const()[name = tensor<string, []>("op_4066_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_4066_end_0 = const()[name = tensor<string, []>("op_4066_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_4066_end_mask_0 = const()[name = tensor<string, []>("op_4066_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = var_4066_end_0, end_mask = var_4066_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4066_cast_fp16")];
+            tensor<int32, [4]> var_4070_begin_0 = const()[name = tensor<string, []>("op_4070_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_4070_end_0 = const()[name = tensor<string, []>("op_4070_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_4070_end_mask_0 = const()[name = tensor<string, []>("op_4070_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4070_cast_fp16 = slice_by_index(begin = var_4070_begin_0, end = var_4070_end_0, end_mask = var_4070_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4070_cast_fp16")];
+            tensor<int32, [4]> var_4074_begin_0 = const()[name = tensor<string, []>("op_4074_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_4074_end_0 = const()[name = tensor<string, []>("op_4074_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_4074_end_mask_0 = const()[name = tensor<string, []>("op_4074_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4074_cast_fp16 = slice_by_index(begin = var_4074_begin_0, end = var_4074_end_0, end_mask = var_4074_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4074_cast_fp16")];
+            tensor<int32, [4]> var_4078_begin_0 = const()[name = tensor<string, []>("op_4078_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_4078_end_0 = const()[name = tensor<string, []>("op_4078_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_4078_end_mask_0 = const()[name = tensor<string, []>("op_4078_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4078_cast_fp16 = slice_by_index(begin = var_4078_begin_0, end = var_4078_end_0, end_mask = var_4078_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4078_cast_fp16")];
+            tensor<int32, [4]> var_4082_begin_0 = const()[name = tensor<string, []>("op_4082_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_4082_end_0 = const()[name = tensor<string, []>("op_4082_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_4082_end_mask_0 = const()[name = tensor<string, []>("op_4082_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4082_cast_fp16 = slice_by_index(begin = var_4082_begin_0, end = var_4082_end_0, end_mask = var_4082_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4082_cast_fp16")];
+            tensor<int32, [4]> var_4086_begin_0 = const()[name = tensor<string, []>("op_4086_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_4086_end_0 = const()[name = tensor<string, []>("op_4086_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_4086_end_mask_0 = const()[name = tensor<string, []>("op_4086_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4086_cast_fp16 = slice_by_index(begin = var_4086_begin_0, end = var_4086_end_0, end_mask = var_4086_end_mask_0, x = query_9_cast_fp16)[name = tensor<string, []>("op_4086_cast_fp16")];
+            tensor<int32, [4]> var_4095_begin_0 = const()[name = tensor<string, []>("op_4095_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4095_end_0 = const()[name = tensor<string, []>("op_4095_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4095_end_mask_0 = const()[name = tensor<string, []>("op_4095_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4095_cast_fp16 = slice_by_index(begin = var_4095_begin_0, end = var_4095_end_0, end_mask = var_4095_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4095_cast_fp16")];
+            tensor<int32, [4]> var_4102_begin_0 = const()[name = tensor<string, []>("op_4102_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4102_end_0 = const()[name = tensor<string, []>("op_4102_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4102_end_mask_0 = const()[name = tensor<string, []>("op_4102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4102_cast_fp16 = slice_by_index(begin = var_4102_begin_0, end = var_4102_end_0, end_mask = var_4102_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4102_cast_fp16")];
+            tensor<int32, [4]> var_4109_begin_0 = const()[name = tensor<string, []>("op_4109_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4109_end_0 = const()[name = tensor<string, []>("op_4109_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4109_end_mask_0 = const()[name = tensor<string, []>("op_4109_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4109_cast_fp16 = slice_by_index(begin = var_4109_begin_0, end = var_4109_end_0, end_mask = var_4109_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4109_cast_fp16")];
+            tensor<int32, [4]> var_4116_begin_0 = const()[name = tensor<string, []>("op_4116_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4116_end_0 = const()[name = tensor<string, []>("op_4116_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4116_end_mask_0 = const()[name = tensor<string, []>("op_4116_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4116_cast_fp16 = slice_by_index(begin = var_4116_begin_0, end = var_4116_end_0, end_mask = var_4116_end_mask_0, x = var_4042_cast_fp16)[name = tensor<string, []>("op_4116_cast_fp16")];
+            tensor<int32, [4]> var_4123_begin_0 = const()[name = tensor<string, []>("op_4123_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4123_end_0 = const()[name = tensor<string, []>("op_4123_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4123_end_mask_0 = const()[name = tensor<string, []>("op_4123_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4123_cast_fp16 = slice_by_index(begin = var_4123_begin_0, end = var_4123_end_0, end_mask = var_4123_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4123_cast_fp16")];
+            tensor<int32, [4]> var_4130_begin_0 = const()[name = tensor<string, []>("op_4130_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4130_end_0 = const()[name = tensor<string, []>("op_4130_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4130_end_mask_0 = const()[name = tensor<string, []>("op_4130_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4130_cast_fp16 = slice_by_index(begin = var_4130_begin_0, end = var_4130_end_0, end_mask = var_4130_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4130_cast_fp16")];
+            tensor<int32, [4]> var_4137_begin_0 = const()[name = tensor<string, []>("op_4137_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4137_end_0 = const()[name = tensor<string, []>("op_4137_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4137_end_mask_0 = const()[name = tensor<string, []>("op_4137_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4137_cast_fp16 = slice_by_index(begin = var_4137_begin_0, end = var_4137_end_0, end_mask = var_4137_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4137_cast_fp16")];
+            tensor<int32, [4]> var_4144_begin_0 = const()[name = tensor<string, []>("op_4144_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4144_end_0 = const()[name = tensor<string, []>("op_4144_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4144_end_mask_0 = const()[name = tensor<string, []>("op_4144_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4144_cast_fp16 = slice_by_index(begin = var_4144_begin_0, end = var_4144_end_0, end_mask = var_4144_end_mask_0, x = var_4046_cast_fp16)[name = tensor<string, []>("op_4144_cast_fp16")];
+            tensor<int32, [4]> var_4151_begin_0 = const()[name = tensor<string, []>("op_4151_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4151_end_0 = const()[name = tensor<string, []>("op_4151_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4151_end_mask_0 = const()[name = tensor<string, []>("op_4151_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4151_cast_fp16 = slice_by_index(begin = var_4151_begin_0, end = var_4151_end_0, end_mask = var_4151_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4151_cast_fp16")];
+            tensor<int32, [4]> var_4158_begin_0 = const()[name = tensor<string, []>("op_4158_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4158_end_0 = const()[name = tensor<string, []>("op_4158_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4158_end_mask_0 = const()[name = tensor<string, []>("op_4158_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4158_cast_fp16 = slice_by_index(begin = var_4158_begin_0, end = var_4158_end_0, end_mask = var_4158_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4158_cast_fp16")];
+            tensor<int32, [4]> var_4165_begin_0 = const()[name = tensor<string, []>("op_4165_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4165_end_0 = const()[name = tensor<string, []>("op_4165_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4165_end_mask_0 = const()[name = tensor<string, []>("op_4165_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4165_cast_fp16 = slice_by_index(begin = var_4165_begin_0, end = var_4165_end_0, end_mask = var_4165_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4165_cast_fp16")];
+            tensor<int32, [4]> var_4172_begin_0 = const()[name = tensor<string, []>("op_4172_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4172_end_0 = const()[name = tensor<string, []>("op_4172_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4172_end_mask_0 = const()[name = tensor<string, []>("op_4172_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4172_cast_fp16 = slice_by_index(begin = var_4172_begin_0, end = var_4172_end_0, end_mask = var_4172_end_mask_0, x = var_4050_cast_fp16)[name = tensor<string, []>("op_4172_cast_fp16")];
+            tensor<int32, [4]> var_4179_begin_0 = const()[name = tensor<string, []>("op_4179_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4179_end_0 = const()[name = tensor<string, []>("op_4179_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4179_end_mask_0 = const()[name = tensor<string, []>("op_4179_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4179_cast_fp16 = slice_by_index(begin = var_4179_begin_0, end = var_4179_end_0, end_mask = var_4179_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4179_cast_fp16")];
+            tensor<int32, [4]> var_4186_begin_0 = const()[name = tensor<string, []>("op_4186_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4186_end_0 = const()[name = tensor<string, []>("op_4186_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4186_end_mask_0 = const()[name = tensor<string, []>("op_4186_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4186_cast_fp16 = slice_by_index(begin = var_4186_begin_0, end = var_4186_end_0, end_mask = var_4186_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4186_cast_fp16")];
+            tensor<int32, [4]> var_4193_begin_0 = const()[name = tensor<string, []>("op_4193_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4193_end_0 = const()[name = tensor<string, []>("op_4193_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4193_end_mask_0 = const()[name = tensor<string, []>("op_4193_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4193_cast_fp16 = slice_by_index(begin = var_4193_begin_0, end = var_4193_end_0, end_mask = var_4193_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4193_cast_fp16")];
+            tensor<int32, [4]> var_4200_begin_0 = const()[name = tensor<string, []>("op_4200_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4200_end_0 = const()[name = tensor<string, []>("op_4200_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4200_end_mask_0 = const()[name = tensor<string, []>("op_4200_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4200_cast_fp16 = slice_by_index(begin = var_4200_begin_0, end = var_4200_end_0, end_mask = var_4200_end_mask_0, x = var_4054_cast_fp16)[name = tensor<string, []>("op_4200_cast_fp16")];
+            tensor<int32, [4]> var_4207_begin_0 = const()[name = tensor<string, []>("op_4207_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4207_end_0 = const()[name = tensor<string, []>("op_4207_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4207_end_mask_0 = const()[name = tensor<string, []>("op_4207_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4207_cast_fp16 = slice_by_index(begin = var_4207_begin_0, end = var_4207_end_0, end_mask = var_4207_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4207_cast_fp16")];
+            tensor<int32, [4]> var_4214_begin_0 = const()[name = tensor<string, []>("op_4214_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4214_end_0 = const()[name = tensor<string, []>("op_4214_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4214_end_mask_0 = const()[name = tensor<string, []>("op_4214_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4214_cast_fp16 = slice_by_index(begin = var_4214_begin_0, end = var_4214_end_0, end_mask = var_4214_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4214_cast_fp16")];
+            tensor<int32, [4]> var_4221_begin_0 = const()[name = tensor<string, []>("op_4221_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4221_end_0 = const()[name = tensor<string, []>("op_4221_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4221_end_mask_0 = const()[name = tensor<string, []>("op_4221_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4221_cast_fp16 = slice_by_index(begin = var_4221_begin_0, end = var_4221_end_0, end_mask = var_4221_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4221_cast_fp16")];
+            tensor<int32, [4]> var_4228_begin_0 = const()[name = tensor<string, []>("op_4228_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4228_end_0 = const()[name = tensor<string, []>("op_4228_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4228_end_mask_0 = const()[name = tensor<string, []>("op_4228_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4228_cast_fp16 = slice_by_index(begin = var_4228_begin_0, end = var_4228_end_0, end_mask = var_4228_end_mask_0, x = var_4058_cast_fp16)[name = tensor<string, []>("op_4228_cast_fp16")];
+            tensor<int32, [4]> var_4235_begin_0 = const()[name = tensor<string, []>("op_4235_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4235_end_0 = const()[name = tensor<string, []>("op_4235_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4235_end_mask_0 = const()[name = tensor<string, []>("op_4235_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4235_cast_fp16 = slice_by_index(begin = var_4235_begin_0, end = var_4235_end_0, end_mask = var_4235_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4235_cast_fp16")];
+            tensor<int32, [4]> var_4242_begin_0 = const()[name = tensor<string, []>("op_4242_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4242_end_0 = const()[name = tensor<string, []>("op_4242_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4242_end_mask_0 = const()[name = tensor<string, []>("op_4242_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4242_cast_fp16 = slice_by_index(begin = var_4242_begin_0, end = var_4242_end_0, end_mask = var_4242_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4242_cast_fp16")];
+            tensor<int32, [4]> var_4249_begin_0 = const()[name = tensor<string, []>("op_4249_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4249_end_0 = const()[name = tensor<string, []>("op_4249_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4249_end_mask_0 = const()[name = tensor<string, []>("op_4249_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4249_cast_fp16 = slice_by_index(begin = var_4249_begin_0, end = var_4249_end_0, end_mask = var_4249_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4249_cast_fp16")];
+            tensor<int32, [4]> var_4256_begin_0 = const()[name = tensor<string, []>("op_4256_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4256_end_0 = const()[name = tensor<string, []>("op_4256_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4256_end_mask_0 = const()[name = tensor<string, []>("op_4256_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4256_cast_fp16 = slice_by_index(begin = var_4256_begin_0, end = var_4256_end_0, end_mask = var_4256_end_mask_0, x = var_4062_cast_fp16)[name = tensor<string, []>("op_4256_cast_fp16")];
+            tensor<int32, [4]> var_4263_begin_0 = const()[name = tensor<string, []>("op_4263_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4263_end_0 = const()[name = tensor<string, []>("op_4263_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4263_end_mask_0 = const()[name = tensor<string, []>("op_4263_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4263_cast_fp16 = slice_by_index(begin = var_4263_begin_0, end = var_4263_end_0, end_mask = var_4263_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4263_cast_fp16")];
+            tensor<int32, [4]> var_4270_begin_0 = const()[name = tensor<string, []>("op_4270_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4270_end_0 = const()[name = tensor<string, []>("op_4270_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4270_end_mask_0 = const()[name = tensor<string, []>("op_4270_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4270_cast_fp16 = slice_by_index(begin = var_4270_begin_0, end = var_4270_end_0, end_mask = var_4270_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4270_cast_fp16")];
+            tensor<int32, [4]> var_4277_begin_0 = const()[name = tensor<string, []>("op_4277_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4277_end_0 = const()[name = tensor<string, []>("op_4277_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4277_end_mask_0 = const()[name = tensor<string, []>("op_4277_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = var_4277_end_0, end_mask = var_4277_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4277_cast_fp16")];
+            tensor<int32, [4]> var_4284_begin_0 = const()[name = tensor<string, []>("op_4284_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4284_end_0 = const()[name = tensor<string, []>("op_4284_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4284_end_mask_0 = const()[name = tensor<string, []>("op_4284_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4284_cast_fp16 = slice_by_index(begin = var_4284_begin_0, end = var_4284_end_0, end_mask = var_4284_end_mask_0, x = var_4066_cast_fp16)[name = tensor<string, []>("op_4284_cast_fp16")];
+            tensor<int32, [4]> var_4291_begin_0 = const()[name = tensor<string, []>("op_4291_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4291_end_0 = const()[name = tensor<string, []>("op_4291_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4291_end_mask_0 = const()[name = tensor<string, []>("op_4291_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4291_cast_fp16 = slice_by_index(begin = var_4291_begin_0, end = var_4291_end_0, end_mask = var_4291_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4291_cast_fp16")];
+            tensor<int32, [4]> var_4298_begin_0 = const()[name = tensor<string, []>("op_4298_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4298_end_0 = const()[name = tensor<string, []>("op_4298_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4298_end_mask_0 = const()[name = tensor<string, []>("op_4298_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4298_cast_fp16 = slice_by_index(begin = var_4298_begin_0, end = var_4298_end_0, end_mask = var_4298_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4298_cast_fp16")];
+            tensor<int32, [4]> var_4305_begin_0 = const()[name = tensor<string, []>("op_4305_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4305_end_0 = const()[name = tensor<string, []>("op_4305_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4305_end_mask_0 = const()[name = tensor<string, []>("op_4305_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4305_cast_fp16 = slice_by_index(begin = var_4305_begin_0, end = var_4305_end_0, end_mask = var_4305_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4305_cast_fp16")];
+            tensor<int32, [4]> var_4312_begin_0 = const()[name = tensor<string, []>("op_4312_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4312_end_0 = const()[name = tensor<string, []>("op_4312_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4312_end_mask_0 = const()[name = tensor<string, []>("op_4312_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4312_cast_fp16 = slice_by_index(begin = var_4312_begin_0, end = var_4312_end_0, end_mask = var_4312_end_mask_0, x = var_4070_cast_fp16)[name = tensor<string, []>("op_4312_cast_fp16")];
+            tensor<int32, [4]> var_4319_begin_0 = const()[name = tensor<string, []>("op_4319_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4319_end_0 = const()[name = tensor<string, []>("op_4319_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4319_end_mask_0 = const()[name = tensor<string, []>("op_4319_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4319_cast_fp16 = slice_by_index(begin = var_4319_begin_0, end = var_4319_end_0, end_mask = var_4319_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4319_cast_fp16")];
+            tensor<int32, [4]> var_4326_begin_0 = const()[name = tensor<string, []>("op_4326_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4326_end_0 = const()[name = tensor<string, []>("op_4326_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4326_end_mask_0 = const()[name = tensor<string, []>("op_4326_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4326_cast_fp16 = slice_by_index(begin = var_4326_begin_0, end = var_4326_end_0, end_mask = var_4326_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4326_cast_fp16")];
+            tensor<int32, [4]> var_4333_begin_0 = const()[name = tensor<string, []>("op_4333_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4333_end_0 = const()[name = tensor<string, []>("op_4333_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4333_end_mask_0 = const()[name = tensor<string, []>("op_4333_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4333_cast_fp16 = slice_by_index(begin = var_4333_begin_0, end = var_4333_end_0, end_mask = var_4333_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4333_cast_fp16")];
+            tensor<int32, [4]> var_4340_begin_0 = const()[name = tensor<string, []>("op_4340_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4340_end_0 = const()[name = tensor<string, []>("op_4340_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4340_end_mask_0 = const()[name = tensor<string, []>("op_4340_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4340_cast_fp16 = slice_by_index(begin = var_4340_begin_0, end = var_4340_end_0, end_mask = var_4340_end_mask_0, x = var_4074_cast_fp16)[name = tensor<string, []>("op_4340_cast_fp16")];
+            tensor<int32, [4]> var_4347_begin_0 = const()[name = tensor<string, []>("op_4347_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4347_end_0 = const()[name = tensor<string, []>("op_4347_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4347_end_mask_0 = const()[name = tensor<string, []>("op_4347_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4347_cast_fp16 = slice_by_index(begin = var_4347_begin_0, end = var_4347_end_0, end_mask = var_4347_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4347_cast_fp16")];
+            tensor<int32, [4]> var_4354_begin_0 = const()[name = tensor<string, []>("op_4354_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4354_end_0 = const()[name = tensor<string, []>("op_4354_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4354_end_mask_0 = const()[name = tensor<string, []>("op_4354_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4354_cast_fp16 = slice_by_index(begin = var_4354_begin_0, end = var_4354_end_0, end_mask = var_4354_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4354_cast_fp16")];
+            tensor<int32, [4]> var_4361_begin_0 = const()[name = tensor<string, []>("op_4361_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4361_end_0 = const()[name = tensor<string, []>("op_4361_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4361_end_mask_0 = const()[name = tensor<string, []>("op_4361_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4361_cast_fp16 = slice_by_index(begin = var_4361_begin_0, end = var_4361_end_0, end_mask = var_4361_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4361_cast_fp16")];
+            tensor<int32, [4]> var_4368_begin_0 = const()[name = tensor<string, []>("op_4368_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4368_end_0 = const()[name = tensor<string, []>("op_4368_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4368_end_mask_0 = const()[name = tensor<string, []>("op_4368_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4368_cast_fp16 = slice_by_index(begin = var_4368_begin_0, end = var_4368_end_0, end_mask = var_4368_end_mask_0, x = var_4078_cast_fp16)[name = tensor<string, []>("op_4368_cast_fp16")];
+            tensor<int32, [4]> var_4375_begin_0 = const()[name = tensor<string, []>("op_4375_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4375_end_0 = const()[name = tensor<string, []>("op_4375_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4375_end_mask_0 = const()[name = tensor<string, []>("op_4375_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4375_cast_fp16 = slice_by_index(begin = var_4375_begin_0, end = var_4375_end_0, end_mask = var_4375_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4375_cast_fp16")];
+            tensor<int32, [4]> var_4382_begin_0 = const()[name = tensor<string, []>("op_4382_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4382_end_0 = const()[name = tensor<string, []>("op_4382_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4382_end_mask_0 = const()[name = tensor<string, []>("op_4382_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4382_cast_fp16 = slice_by_index(begin = var_4382_begin_0, end = var_4382_end_0, end_mask = var_4382_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4382_cast_fp16")];
+            tensor<int32, [4]> var_4389_begin_0 = const()[name = tensor<string, []>("op_4389_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4389_end_0 = const()[name = tensor<string, []>("op_4389_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4389_end_mask_0 = const()[name = tensor<string, []>("op_4389_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4389_cast_fp16 = slice_by_index(begin = var_4389_begin_0, end = var_4389_end_0, end_mask = var_4389_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4389_cast_fp16")];
+            tensor<int32, [4]> var_4396_begin_0 = const()[name = tensor<string, []>("op_4396_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4396_end_0 = const()[name = tensor<string, []>("op_4396_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4396_end_mask_0 = const()[name = tensor<string, []>("op_4396_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4396_cast_fp16 = slice_by_index(begin = var_4396_begin_0, end = var_4396_end_0, end_mask = var_4396_end_mask_0, x = var_4082_cast_fp16)[name = tensor<string, []>("op_4396_cast_fp16")];
+            tensor<int32, [4]> var_4403_begin_0 = const()[name = tensor<string, []>("op_4403_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4403_end_0 = const()[name = tensor<string, []>("op_4403_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_4403_end_mask_0 = const()[name = tensor<string, []>("op_4403_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4403_cast_fp16 = slice_by_index(begin = var_4403_begin_0, end = var_4403_end_0, end_mask = var_4403_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4403_cast_fp16")];
+            tensor<int32, [4]> var_4410_begin_0 = const()[name = tensor<string, []>("op_4410_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_4410_end_0 = const()[name = tensor<string, []>("op_4410_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_4410_end_mask_0 = const()[name = tensor<string, []>("op_4410_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4410_cast_fp16 = slice_by_index(begin = var_4410_begin_0, end = var_4410_end_0, end_mask = var_4410_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4410_cast_fp16")];
+            tensor<int32, [4]> var_4417_begin_0 = const()[name = tensor<string, []>("op_4417_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_4417_end_0 = const()[name = tensor<string, []>("op_4417_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_4417_end_mask_0 = const()[name = tensor<string, []>("op_4417_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4417_cast_fp16 = slice_by_index(begin = var_4417_begin_0, end = var_4417_end_0, end_mask = var_4417_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4417_cast_fp16")];
+            tensor<int32, [4]> var_4424_begin_0 = const()[name = tensor<string, []>("op_4424_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_4424_end_0 = const()[name = tensor<string, []>("op_4424_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4424_end_mask_0 = const()[name = tensor<string, []>("op_4424_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_4424_cast_fp16 = slice_by_index(begin = var_4424_begin_0, end = var_4424_end_0, end_mask = var_4424_end_mask_0, x = var_4086_cast_fp16)[name = tensor<string, []>("op_4424_cast_fp16")];
+            tensor<int32, [4]> k_9_perm_0 = const()[name = tensor<string, []>("k_9_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_4429_begin_0 = const()[name = tensor<string, []>("op_4429_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4429_end_0 = const()[name = tensor<string, []>("op_4429_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_4429_end_mask_0 = const()[name = tensor<string, []>("op_4429_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_7 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4429_cast_fp16 = slice_by_index(begin = var_4429_begin_0, end = var_4429_end_0, end_mask = var_4429_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4429_cast_fp16")];
+            tensor<int32, [4]> var_4433_begin_0 = const()[name = tensor<string, []>("op_4433_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_4433_end_0 = const()[name = tensor<string, []>("op_4433_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_4433_end_mask_0 = const()[name = tensor<string, []>("op_4433_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4433_cast_fp16 = slice_by_index(begin = var_4433_begin_0, end = var_4433_end_0, end_mask = var_4433_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4433_cast_fp16")];
+            tensor<int32, [4]> var_4437_begin_0 = const()[name = tensor<string, []>("op_4437_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_4437_end_0 = const()[name = tensor<string, []>("op_4437_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_4437_end_mask_0 = const()[name = tensor<string, []>("op_4437_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4437_cast_fp16 = slice_by_index(begin = var_4437_begin_0, end = var_4437_end_0, end_mask = var_4437_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4437_cast_fp16")];
+            tensor<int32, [4]> var_4441_begin_0 = const()[name = tensor<string, []>("op_4441_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_4441_end_0 = const()[name = tensor<string, []>("op_4441_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_4441_end_mask_0 = const()[name = tensor<string, []>("op_4441_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4441_cast_fp16 = slice_by_index(begin = var_4441_begin_0, end = var_4441_end_0, end_mask = var_4441_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4441_cast_fp16")];
+            tensor<int32, [4]> var_4445_begin_0 = const()[name = tensor<string, []>("op_4445_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_4445_end_0 = const()[name = tensor<string, []>("op_4445_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_4445_end_mask_0 = const()[name = tensor<string, []>("op_4445_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4445_cast_fp16 = slice_by_index(begin = var_4445_begin_0, end = var_4445_end_0, end_mask = var_4445_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4445_cast_fp16")];
+            tensor<int32, [4]> var_4449_begin_0 = const()[name = tensor<string, []>("op_4449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_4449_end_0 = const()[name = tensor<string, []>("op_4449_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_4449_end_mask_0 = const()[name = tensor<string, []>("op_4449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4449_cast_fp16 = slice_by_index(begin = var_4449_begin_0, end = var_4449_end_0, end_mask = var_4449_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4449_cast_fp16")];
+            tensor<int32, [4]> var_4453_begin_0 = const()[name = tensor<string, []>("op_4453_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_4453_end_0 = const()[name = tensor<string, []>("op_4453_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_4453_end_mask_0 = const()[name = tensor<string, []>("op_4453_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4453_cast_fp16 = slice_by_index(begin = var_4453_begin_0, end = var_4453_end_0, end_mask = var_4453_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4453_cast_fp16")];
+            tensor<int32, [4]> var_4457_begin_0 = const()[name = tensor<string, []>("op_4457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_4457_end_0 = const()[name = tensor<string, []>("op_4457_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_4457_end_mask_0 = const()[name = tensor<string, []>("op_4457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4457_cast_fp16 = slice_by_index(begin = var_4457_begin_0, end = var_4457_end_0, end_mask = var_4457_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4457_cast_fp16")];
+            tensor<int32, [4]> var_4461_begin_0 = const()[name = tensor<string, []>("op_4461_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_4461_end_0 = const()[name = tensor<string, []>("op_4461_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_4461_end_mask_0 = const()[name = tensor<string, []>("op_4461_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4461_cast_fp16 = slice_by_index(begin = var_4461_begin_0, end = var_4461_end_0, end_mask = var_4461_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4461_cast_fp16")];
+            tensor<int32, [4]> var_4465_begin_0 = const()[name = tensor<string, []>("op_4465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_4465_end_0 = const()[name = tensor<string, []>("op_4465_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_4465_end_mask_0 = const()[name = tensor<string, []>("op_4465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4465_cast_fp16 = slice_by_index(begin = var_4465_begin_0, end = var_4465_end_0, end_mask = var_4465_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4465_cast_fp16")];
+            tensor<int32, [4]> var_4469_begin_0 = const()[name = tensor<string, []>("op_4469_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_4469_end_0 = const()[name = tensor<string, []>("op_4469_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_4469_end_mask_0 = const()[name = tensor<string, []>("op_4469_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4469_cast_fp16 = slice_by_index(begin = var_4469_begin_0, end = var_4469_end_0, end_mask = var_4469_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4469_cast_fp16")];
+            tensor<int32, [4]> var_4473_begin_0 = const()[name = tensor<string, []>("op_4473_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_4473_end_0 = const()[name = tensor<string, []>("op_4473_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_4473_end_mask_0 = const()[name = tensor<string, []>("op_4473_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_4473_cast_fp16 = slice_by_index(begin = var_4473_begin_0, end = var_4473_end_0, end_mask = var_4473_end_mask_0, x = transpose_7)[name = tensor<string, []>("op_4473_cast_fp16")];
+            tensor<int32, [4]> var_4475_begin_0 = const()[name = tensor<string, []>("op_4475_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_4475_end_0 = const()[name = tensor<string, []>("op_4475_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_4475_end_mask_0 = const()[name = tensor<string, []>("op_4475_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4475_cast_fp16 = slice_by_index(begin = var_4475_begin_0, end = var_4475_end_0, end_mask = var_4475_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4475_cast_fp16")];
+            tensor<int32, [4]> var_4479_begin_0 = const()[name = tensor<string, []>("op_4479_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_4479_end_0 = const()[name = tensor<string, []>("op_4479_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_4479_end_mask_0 = const()[name = tensor<string, []>("op_4479_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4479_cast_fp16 = slice_by_index(begin = var_4479_begin_0, end = var_4479_end_0, end_mask = var_4479_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4479_cast_fp16")];
+            tensor<int32, [4]> var_4483_begin_0 = const()[name = tensor<string, []>("op_4483_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_4483_end_0 = const()[name = tensor<string, []>("op_4483_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_4483_end_mask_0 = const()[name = tensor<string, []>("op_4483_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4483_cast_fp16 = slice_by_index(begin = var_4483_begin_0, end = var_4483_end_0, end_mask = var_4483_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4483_cast_fp16")];
+            tensor<int32, [4]> var_4487_begin_0 = const()[name = tensor<string, []>("op_4487_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_4487_end_0 = const()[name = tensor<string, []>("op_4487_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_4487_end_mask_0 = const()[name = tensor<string, []>("op_4487_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = var_4487_end_0, end_mask = var_4487_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4487_cast_fp16")];
+            tensor<int32, [4]> var_4491_begin_0 = const()[name = tensor<string, []>("op_4491_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_4491_end_0 = const()[name = tensor<string, []>("op_4491_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_4491_end_mask_0 = const()[name = tensor<string, []>("op_4491_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4491_cast_fp16 = slice_by_index(begin = var_4491_begin_0, end = var_4491_end_0, end_mask = var_4491_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4491_cast_fp16")];
+            tensor<int32, [4]> var_4495_begin_0 = const()[name = tensor<string, []>("op_4495_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_4495_end_0 = const()[name = tensor<string, []>("op_4495_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_4495_end_mask_0 = const()[name = tensor<string, []>("op_4495_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4495_cast_fp16 = slice_by_index(begin = var_4495_begin_0, end = var_4495_end_0, end_mask = var_4495_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4495_cast_fp16")];
+            tensor<int32, [4]> var_4499_begin_0 = const()[name = tensor<string, []>("op_4499_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_4499_end_0 = const()[name = tensor<string, []>("op_4499_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_4499_end_mask_0 = const()[name = tensor<string, []>("op_4499_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4499_cast_fp16 = slice_by_index(begin = var_4499_begin_0, end = var_4499_end_0, end_mask = var_4499_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4499_cast_fp16")];
+            tensor<int32, [4]> var_4503_begin_0 = const()[name = tensor<string, []>("op_4503_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_4503_end_0 = const()[name = tensor<string, []>("op_4503_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_4503_end_mask_0 = const()[name = tensor<string, []>("op_4503_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4503_cast_fp16 = slice_by_index(begin = var_4503_begin_0, end = var_4503_end_0, end_mask = var_4503_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4503_cast_fp16")];
+            tensor<int32, [4]> var_4507_begin_0 = const()[name = tensor<string, []>("op_4507_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_4507_end_0 = const()[name = tensor<string, []>("op_4507_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_4507_end_mask_0 = const()[name = tensor<string, []>("op_4507_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4507_cast_fp16 = slice_by_index(begin = var_4507_begin_0, end = var_4507_end_0, end_mask = var_4507_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4507_cast_fp16")];
+            tensor<int32, [4]> var_4511_begin_0 = const()[name = tensor<string, []>("op_4511_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_4511_end_0 = const()[name = tensor<string, []>("op_4511_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_4511_end_mask_0 = const()[name = tensor<string, []>("op_4511_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4511_cast_fp16 = slice_by_index(begin = var_4511_begin_0, end = var_4511_end_0, end_mask = var_4511_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4511_cast_fp16")];
+            tensor<int32, [4]> var_4515_begin_0 = const()[name = tensor<string, []>("op_4515_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_4515_end_0 = const()[name = tensor<string, []>("op_4515_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_4515_end_mask_0 = const()[name = tensor<string, []>("op_4515_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4515_cast_fp16 = slice_by_index(begin = var_4515_begin_0, end = var_4515_end_0, end_mask = var_4515_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4515_cast_fp16")];
+            tensor<int32, [4]> var_4519_begin_0 = const()[name = tensor<string, []>("op_4519_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_4519_end_0 = const()[name = tensor<string, []>("op_4519_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_4519_end_mask_0 = const()[name = tensor<string, []>("op_4519_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_4519_cast_fp16 = slice_by_index(begin = var_4519_begin_0, end = var_4519_end_0, end_mask = var_4519_end_mask_0, x = value_9_cast_fp16)[name = tensor<string, []>("op_4519_cast_fp16")];
+            tensor<string, []> var_4523_equation_0 = const()[name = tensor<string, []>("op_4523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4523_cast_fp16 = einsum(equation = var_4523_equation_0, values = (var_4429_cast_fp16, var_4095_cast_fp16))[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<fp16, []> var_4524_to_fp16 = const()[name = tensor<string, []>("op_4524_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_385_cast_fp16 = mul(x = var_4523_cast_fp16, y = var_4524_to_fp16)[name = tensor<string, []>("aw_chunk_385_cast_fp16")];
+            tensor<string, []> var_4527_equation_0 = const()[name = tensor<string, []>("op_4527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4527_cast_fp16 = einsum(equation = var_4527_equation_0, values = (var_4429_cast_fp16, var_4102_cast_fp16))[name = tensor<string, []>("op_4527_cast_fp16")];
+            tensor<fp16, []> var_4528_to_fp16 = const()[name = tensor<string, []>("op_4528_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_387_cast_fp16 = mul(x = var_4527_cast_fp16, y = var_4528_to_fp16)[name = tensor<string, []>("aw_chunk_387_cast_fp16")];
+            tensor<string, []> var_4531_equation_0 = const()[name = tensor<string, []>("op_4531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4531_cast_fp16 = einsum(equation = var_4531_equation_0, values = (var_4429_cast_fp16, var_4109_cast_fp16))[name = tensor<string, []>("op_4531_cast_fp16")];
+            tensor<fp16, []> var_4532_to_fp16 = const()[name = tensor<string, []>("op_4532_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_389_cast_fp16 = mul(x = var_4531_cast_fp16, y = var_4532_to_fp16)[name = tensor<string, []>("aw_chunk_389_cast_fp16")];
+            tensor<string, []> var_4535_equation_0 = const()[name = tensor<string, []>("op_4535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4535_cast_fp16 = einsum(equation = var_4535_equation_0, values = (var_4429_cast_fp16, var_4116_cast_fp16))[name = tensor<string, []>("op_4535_cast_fp16")];
+            tensor<fp16, []> var_4536_to_fp16 = const()[name = tensor<string, []>("op_4536_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_391_cast_fp16 = mul(x = var_4535_cast_fp16, y = var_4536_to_fp16)[name = tensor<string, []>("aw_chunk_391_cast_fp16")];
+            tensor<string, []> var_4539_equation_0 = const()[name = tensor<string, []>("op_4539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4539_cast_fp16 = einsum(equation = var_4539_equation_0, values = (var_4433_cast_fp16, var_4123_cast_fp16))[name = tensor<string, []>("op_4539_cast_fp16")];
+            tensor<fp16, []> var_4540_to_fp16 = const()[name = tensor<string, []>("op_4540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_393_cast_fp16 = mul(x = var_4539_cast_fp16, y = var_4540_to_fp16)[name = tensor<string, []>("aw_chunk_393_cast_fp16")];
+            tensor<string, []> var_4543_equation_0 = const()[name = tensor<string, []>("op_4543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4543_cast_fp16 = einsum(equation = var_4543_equation_0, values = (var_4433_cast_fp16, var_4130_cast_fp16))[name = tensor<string, []>("op_4543_cast_fp16")];
+            tensor<fp16, []> var_4544_to_fp16 = const()[name = tensor<string, []>("op_4544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_395_cast_fp16 = mul(x = var_4543_cast_fp16, y = var_4544_to_fp16)[name = tensor<string, []>("aw_chunk_395_cast_fp16")];
+            tensor<string, []> var_4547_equation_0 = const()[name = tensor<string, []>("op_4547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4547_cast_fp16 = einsum(equation = var_4547_equation_0, values = (var_4433_cast_fp16, var_4137_cast_fp16))[name = tensor<string, []>("op_4547_cast_fp16")];
+            tensor<fp16, []> var_4548_to_fp16 = const()[name = tensor<string, []>("op_4548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_397_cast_fp16 = mul(x = var_4547_cast_fp16, y = var_4548_to_fp16)[name = tensor<string, []>("aw_chunk_397_cast_fp16")];
+            tensor<string, []> var_4551_equation_0 = const()[name = tensor<string, []>("op_4551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4551_cast_fp16 = einsum(equation = var_4551_equation_0, values = (var_4433_cast_fp16, var_4144_cast_fp16))[name = tensor<string, []>("op_4551_cast_fp16")];
+            tensor<fp16, []> var_4552_to_fp16 = const()[name = tensor<string, []>("op_4552_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_399_cast_fp16 = mul(x = var_4551_cast_fp16, y = var_4552_to_fp16)[name = tensor<string, []>("aw_chunk_399_cast_fp16")];
+            tensor<string, []> var_4555_equation_0 = const()[name = tensor<string, []>("op_4555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4555_cast_fp16 = einsum(equation = var_4555_equation_0, values = (var_4437_cast_fp16, var_4151_cast_fp16))[name = tensor<string, []>("op_4555_cast_fp16")];
+            tensor<fp16, []> var_4556_to_fp16 = const()[name = tensor<string, []>("op_4556_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_401_cast_fp16 = mul(x = var_4555_cast_fp16, y = var_4556_to_fp16)[name = tensor<string, []>("aw_chunk_401_cast_fp16")];
+            tensor<string, []> var_4559_equation_0 = const()[name = tensor<string, []>("op_4559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4559_cast_fp16 = einsum(equation = var_4559_equation_0, values = (var_4437_cast_fp16, var_4158_cast_fp16))[name = tensor<string, []>("op_4559_cast_fp16")];
+            tensor<fp16, []> var_4560_to_fp16 = const()[name = tensor<string, []>("op_4560_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_403_cast_fp16 = mul(x = var_4559_cast_fp16, y = var_4560_to_fp16)[name = tensor<string, []>("aw_chunk_403_cast_fp16")];
+            tensor<string, []> var_4563_equation_0 = const()[name = tensor<string, []>("op_4563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4563_cast_fp16 = einsum(equation = var_4563_equation_0, values = (var_4437_cast_fp16, var_4165_cast_fp16))[name = tensor<string, []>("op_4563_cast_fp16")];
+            tensor<fp16, []> var_4564_to_fp16 = const()[name = tensor<string, []>("op_4564_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_405_cast_fp16 = mul(x = var_4563_cast_fp16, y = var_4564_to_fp16)[name = tensor<string, []>("aw_chunk_405_cast_fp16")];
+            tensor<string, []> var_4567_equation_0 = const()[name = tensor<string, []>("op_4567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4567_cast_fp16 = einsum(equation = var_4567_equation_0, values = (var_4437_cast_fp16, var_4172_cast_fp16))[name = tensor<string, []>("op_4567_cast_fp16")];
+            tensor<fp16, []> var_4568_to_fp16 = const()[name = tensor<string, []>("op_4568_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_407_cast_fp16 = mul(x = var_4567_cast_fp16, y = var_4568_to_fp16)[name = tensor<string, []>("aw_chunk_407_cast_fp16")];
+            tensor<string, []> var_4571_equation_0 = const()[name = tensor<string, []>("op_4571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4571_cast_fp16 = einsum(equation = var_4571_equation_0, values = (var_4441_cast_fp16, var_4179_cast_fp16))[name = tensor<string, []>("op_4571_cast_fp16")];
+            tensor<fp16, []> var_4572_to_fp16 = const()[name = tensor<string, []>("op_4572_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_409_cast_fp16 = mul(x = var_4571_cast_fp16, y = var_4572_to_fp16)[name = tensor<string, []>("aw_chunk_409_cast_fp16")];
+            tensor<string, []> var_4575_equation_0 = const()[name = tensor<string, []>("op_4575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4575_cast_fp16 = einsum(equation = var_4575_equation_0, values = (var_4441_cast_fp16, var_4186_cast_fp16))[name = tensor<string, []>("op_4575_cast_fp16")];
+            tensor<fp16, []> var_4576_to_fp16 = const()[name = tensor<string, []>("op_4576_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_411_cast_fp16 = mul(x = var_4575_cast_fp16, y = var_4576_to_fp16)[name = tensor<string, []>("aw_chunk_411_cast_fp16")];
+            tensor<string, []> var_4579_equation_0 = const()[name = tensor<string, []>("op_4579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4579_cast_fp16 = einsum(equation = var_4579_equation_0, values = (var_4441_cast_fp16, var_4193_cast_fp16))[name = tensor<string, []>("op_4579_cast_fp16")];
+            tensor<fp16, []> var_4580_to_fp16 = const()[name = tensor<string, []>("op_4580_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_413_cast_fp16 = mul(x = var_4579_cast_fp16, y = var_4580_to_fp16)[name = tensor<string, []>("aw_chunk_413_cast_fp16")];
+            tensor<string, []> var_4583_equation_0 = const()[name = tensor<string, []>("op_4583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4583_cast_fp16 = einsum(equation = var_4583_equation_0, values = (var_4441_cast_fp16, var_4200_cast_fp16))[name = tensor<string, []>("op_4583_cast_fp16")];
+            tensor<fp16, []> var_4584_to_fp16 = const()[name = tensor<string, []>("op_4584_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_415_cast_fp16 = mul(x = var_4583_cast_fp16, y = var_4584_to_fp16)[name = tensor<string, []>("aw_chunk_415_cast_fp16")];
+            tensor<string, []> var_4587_equation_0 = const()[name = tensor<string, []>("op_4587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4587_cast_fp16 = einsum(equation = var_4587_equation_0, values = (var_4445_cast_fp16, var_4207_cast_fp16))[name = tensor<string, []>("op_4587_cast_fp16")];
+            tensor<fp16, []> var_4588_to_fp16 = const()[name = tensor<string, []>("op_4588_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_417_cast_fp16 = mul(x = var_4587_cast_fp16, y = var_4588_to_fp16)[name = tensor<string, []>("aw_chunk_417_cast_fp16")];
+            tensor<string, []> var_4591_equation_0 = const()[name = tensor<string, []>("op_4591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4591_cast_fp16 = einsum(equation = var_4591_equation_0, values = (var_4445_cast_fp16, var_4214_cast_fp16))[name = tensor<string, []>("op_4591_cast_fp16")];
+            tensor<fp16, []> var_4592_to_fp16 = const()[name = tensor<string, []>("op_4592_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_419_cast_fp16 = mul(x = var_4591_cast_fp16, y = var_4592_to_fp16)[name = tensor<string, []>("aw_chunk_419_cast_fp16")];
+            tensor<string, []> var_4595_equation_0 = const()[name = tensor<string, []>("op_4595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4595_cast_fp16 = einsum(equation = var_4595_equation_0, values = (var_4445_cast_fp16, var_4221_cast_fp16))[name = tensor<string, []>("op_4595_cast_fp16")];
+            tensor<fp16, []> var_4596_to_fp16 = const()[name = tensor<string, []>("op_4596_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_421_cast_fp16 = mul(x = var_4595_cast_fp16, y = var_4596_to_fp16)[name = tensor<string, []>("aw_chunk_421_cast_fp16")];
+            tensor<string, []> var_4599_equation_0 = const()[name = tensor<string, []>("op_4599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4599_cast_fp16 = einsum(equation = var_4599_equation_0, values = (var_4445_cast_fp16, var_4228_cast_fp16))[name = tensor<string, []>("op_4599_cast_fp16")];
+            tensor<fp16, []> var_4600_to_fp16 = const()[name = tensor<string, []>("op_4600_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_423_cast_fp16 = mul(x = var_4599_cast_fp16, y = var_4600_to_fp16)[name = tensor<string, []>("aw_chunk_423_cast_fp16")];
+            tensor<string, []> var_4603_equation_0 = const()[name = tensor<string, []>("op_4603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4603_cast_fp16 = einsum(equation = var_4603_equation_0, values = (var_4449_cast_fp16, var_4235_cast_fp16))[name = tensor<string, []>("op_4603_cast_fp16")];
+            tensor<fp16, []> var_4604_to_fp16 = const()[name = tensor<string, []>("op_4604_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_425_cast_fp16 = mul(x = var_4603_cast_fp16, y = var_4604_to_fp16)[name = tensor<string, []>("aw_chunk_425_cast_fp16")];
+            tensor<string, []> var_4607_equation_0 = const()[name = tensor<string, []>("op_4607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4607_cast_fp16 = einsum(equation = var_4607_equation_0, values = (var_4449_cast_fp16, var_4242_cast_fp16))[name = tensor<string, []>("op_4607_cast_fp16")];
+            tensor<fp16, []> var_4608_to_fp16 = const()[name = tensor<string, []>("op_4608_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_427_cast_fp16 = mul(x = var_4607_cast_fp16, y = var_4608_to_fp16)[name = tensor<string, []>("aw_chunk_427_cast_fp16")];
+            tensor<string, []> var_4611_equation_0 = const()[name = tensor<string, []>("op_4611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4611_cast_fp16 = einsum(equation = var_4611_equation_0, values = (var_4449_cast_fp16, var_4249_cast_fp16))[name = tensor<string, []>("op_4611_cast_fp16")];
+            tensor<fp16, []> var_4612_to_fp16 = const()[name = tensor<string, []>("op_4612_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_429_cast_fp16 = mul(x = var_4611_cast_fp16, y = var_4612_to_fp16)[name = tensor<string, []>("aw_chunk_429_cast_fp16")];
+            tensor<string, []> var_4615_equation_0 = const()[name = tensor<string, []>("op_4615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4615_cast_fp16 = einsum(equation = var_4615_equation_0, values = (var_4449_cast_fp16, var_4256_cast_fp16))[name = tensor<string, []>("op_4615_cast_fp16")];
+            tensor<fp16, []> var_4616_to_fp16 = const()[name = tensor<string, []>("op_4616_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_431_cast_fp16 = mul(x = var_4615_cast_fp16, y = var_4616_to_fp16)[name = tensor<string, []>("aw_chunk_431_cast_fp16")];
+            tensor<string, []> var_4619_equation_0 = const()[name = tensor<string, []>("op_4619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4619_cast_fp16 = einsum(equation = var_4619_equation_0, values = (var_4453_cast_fp16, var_4263_cast_fp16))[name = tensor<string, []>("op_4619_cast_fp16")];
+            tensor<fp16, []> var_4620_to_fp16 = const()[name = tensor<string, []>("op_4620_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_433_cast_fp16 = mul(x = var_4619_cast_fp16, y = var_4620_to_fp16)[name = tensor<string, []>("aw_chunk_433_cast_fp16")];
+            tensor<string, []> var_4623_equation_0 = const()[name = tensor<string, []>("op_4623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4623_cast_fp16 = einsum(equation = var_4623_equation_0, values = (var_4453_cast_fp16, var_4270_cast_fp16))[name = tensor<string, []>("op_4623_cast_fp16")];
+            tensor<fp16, []> var_4624_to_fp16 = const()[name = tensor<string, []>("op_4624_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_435_cast_fp16 = mul(x = var_4623_cast_fp16, y = var_4624_to_fp16)[name = tensor<string, []>("aw_chunk_435_cast_fp16")];
+            tensor<string, []> var_4627_equation_0 = const()[name = tensor<string, []>("op_4627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4627_cast_fp16 = einsum(equation = var_4627_equation_0, values = (var_4453_cast_fp16, var_4277_cast_fp16))[name = tensor<string, []>("op_4627_cast_fp16")];
+            tensor<fp16, []> var_4628_to_fp16 = const()[name = tensor<string, []>("op_4628_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_437_cast_fp16 = mul(x = var_4627_cast_fp16, y = var_4628_to_fp16)[name = tensor<string, []>("aw_chunk_437_cast_fp16")];
+            tensor<string, []> var_4631_equation_0 = const()[name = tensor<string, []>("op_4631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4631_cast_fp16 = einsum(equation = var_4631_equation_0, values = (var_4453_cast_fp16, var_4284_cast_fp16))[name = tensor<string, []>("op_4631_cast_fp16")];
+            tensor<fp16, []> var_4632_to_fp16 = const()[name = tensor<string, []>("op_4632_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_439_cast_fp16 = mul(x = var_4631_cast_fp16, y = var_4632_to_fp16)[name = tensor<string, []>("aw_chunk_439_cast_fp16")];
+            tensor<string, []> var_4635_equation_0 = const()[name = tensor<string, []>("op_4635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4635_cast_fp16 = einsum(equation = var_4635_equation_0, values = (var_4457_cast_fp16, var_4291_cast_fp16))[name = tensor<string, []>("op_4635_cast_fp16")];
+            tensor<fp16, []> var_4636_to_fp16 = const()[name = tensor<string, []>("op_4636_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_441_cast_fp16 = mul(x = var_4635_cast_fp16, y = var_4636_to_fp16)[name = tensor<string, []>("aw_chunk_441_cast_fp16")];
+            tensor<string, []> var_4639_equation_0 = const()[name = tensor<string, []>("op_4639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4639_cast_fp16 = einsum(equation = var_4639_equation_0, values = (var_4457_cast_fp16, var_4298_cast_fp16))[name = tensor<string, []>("op_4639_cast_fp16")];
+            tensor<fp16, []> var_4640_to_fp16 = const()[name = tensor<string, []>("op_4640_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_443_cast_fp16 = mul(x = var_4639_cast_fp16, y = var_4640_to_fp16)[name = tensor<string, []>("aw_chunk_443_cast_fp16")];
+            tensor<string, []> var_4643_equation_0 = const()[name = tensor<string, []>("op_4643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4643_cast_fp16 = einsum(equation = var_4643_equation_0, values = (var_4457_cast_fp16, var_4305_cast_fp16))[name = tensor<string, []>("op_4643_cast_fp16")];
+            tensor<fp16, []> var_4644_to_fp16 = const()[name = tensor<string, []>("op_4644_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_445_cast_fp16 = mul(x = var_4643_cast_fp16, y = var_4644_to_fp16)[name = tensor<string, []>("aw_chunk_445_cast_fp16")];
+            tensor<string, []> var_4647_equation_0 = const()[name = tensor<string, []>("op_4647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4647_cast_fp16 = einsum(equation = var_4647_equation_0, values = (var_4457_cast_fp16, var_4312_cast_fp16))[name = tensor<string, []>("op_4647_cast_fp16")];
+            tensor<fp16, []> var_4648_to_fp16 = const()[name = tensor<string, []>("op_4648_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_447_cast_fp16 = mul(x = var_4647_cast_fp16, y = var_4648_to_fp16)[name = tensor<string, []>("aw_chunk_447_cast_fp16")];
+            tensor<string, []> var_4651_equation_0 = const()[name = tensor<string, []>("op_4651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4651_cast_fp16 = einsum(equation = var_4651_equation_0, values = (var_4461_cast_fp16, var_4319_cast_fp16))[name = tensor<string, []>("op_4651_cast_fp16")];
+            tensor<fp16, []> var_4652_to_fp16 = const()[name = tensor<string, []>("op_4652_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_449_cast_fp16 = mul(x = var_4651_cast_fp16, y = var_4652_to_fp16)[name = tensor<string, []>("aw_chunk_449_cast_fp16")];
+            tensor<string, []> var_4655_equation_0 = const()[name = tensor<string, []>("op_4655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4655_cast_fp16 = einsum(equation = var_4655_equation_0, values = (var_4461_cast_fp16, var_4326_cast_fp16))[name = tensor<string, []>("op_4655_cast_fp16")];
+            tensor<fp16, []> var_4656_to_fp16 = const()[name = tensor<string, []>("op_4656_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_451_cast_fp16 = mul(x = var_4655_cast_fp16, y = var_4656_to_fp16)[name = tensor<string, []>("aw_chunk_451_cast_fp16")];
+            tensor<string, []> var_4659_equation_0 = const()[name = tensor<string, []>("op_4659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4659_cast_fp16 = einsum(equation = var_4659_equation_0, values = (var_4461_cast_fp16, var_4333_cast_fp16))[name = tensor<string, []>("op_4659_cast_fp16")];
+            tensor<fp16, []> var_4660_to_fp16 = const()[name = tensor<string, []>("op_4660_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_453_cast_fp16 = mul(x = var_4659_cast_fp16, y = var_4660_to_fp16)[name = tensor<string, []>("aw_chunk_453_cast_fp16")];
+            tensor<string, []> var_4663_equation_0 = const()[name = tensor<string, []>("op_4663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4663_cast_fp16 = einsum(equation = var_4663_equation_0, values = (var_4461_cast_fp16, var_4340_cast_fp16))[name = tensor<string, []>("op_4663_cast_fp16")];
+            tensor<fp16, []> var_4664_to_fp16 = const()[name = tensor<string, []>("op_4664_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_455_cast_fp16 = mul(x = var_4663_cast_fp16, y = var_4664_to_fp16)[name = tensor<string, []>("aw_chunk_455_cast_fp16")];
+            tensor<string, []> var_4667_equation_0 = const()[name = tensor<string, []>("op_4667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4667_cast_fp16 = einsum(equation = var_4667_equation_0, values = (var_4465_cast_fp16, var_4347_cast_fp16))[name = tensor<string, []>("op_4667_cast_fp16")];
+            tensor<fp16, []> var_4668_to_fp16 = const()[name = tensor<string, []>("op_4668_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_457_cast_fp16 = mul(x = var_4667_cast_fp16, y = var_4668_to_fp16)[name = tensor<string, []>("aw_chunk_457_cast_fp16")];
+            tensor<string, []> var_4671_equation_0 = const()[name = tensor<string, []>("op_4671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4671_cast_fp16 = einsum(equation = var_4671_equation_0, values = (var_4465_cast_fp16, var_4354_cast_fp16))[name = tensor<string, []>("op_4671_cast_fp16")];
+            tensor<fp16, []> var_4672_to_fp16 = const()[name = tensor<string, []>("op_4672_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_459_cast_fp16 = mul(x = var_4671_cast_fp16, y = var_4672_to_fp16)[name = tensor<string, []>("aw_chunk_459_cast_fp16")];
+            tensor<string, []> var_4675_equation_0 = const()[name = tensor<string, []>("op_4675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4675_cast_fp16 = einsum(equation = var_4675_equation_0, values = (var_4465_cast_fp16, var_4361_cast_fp16))[name = tensor<string, []>("op_4675_cast_fp16")];
+            tensor<fp16, []> var_4676_to_fp16 = const()[name = tensor<string, []>("op_4676_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_461_cast_fp16 = mul(x = var_4675_cast_fp16, y = var_4676_to_fp16)[name = tensor<string, []>("aw_chunk_461_cast_fp16")];
+            tensor<string, []> var_4679_equation_0 = const()[name = tensor<string, []>("op_4679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4679_cast_fp16 = einsum(equation = var_4679_equation_0, values = (var_4465_cast_fp16, var_4368_cast_fp16))[name = tensor<string, []>("op_4679_cast_fp16")];
+            tensor<fp16, []> var_4680_to_fp16 = const()[name = tensor<string, []>("op_4680_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_463_cast_fp16 = mul(x = var_4679_cast_fp16, y = var_4680_to_fp16)[name = tensor<string, []>("aw_chunk_463_cast_fp16")];
+            tensor<string, []> var_4683_equation_0 = const()[name = tensor<string, []>("op_4683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4683_cast_fp16 = einsum(equation = var_4683_equation_0, values = (var_4469_cast_fp16, var_4375_cast_fp16))[name = tensor<string, []>("op_4683_cast_fp16")];
+            tensor<fp16, []> var_4684_to_fp16 = const()[name = tensor<string, []>("op_4684_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_465_cast_fp16 = mul(x = var_4683_cast_fp16, y = var_4684_to_fp16)[name = tensor<string, []>("aw_chunk_465_cast_fp16")];
+            tensor<string, []> var_4687_equation_0 = const()[name = tensor<string, []>("op_4687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4687_cast_fp16 = einsum(equation = var_4687_equation_0, values = (var_4469_cast_fp16, var_4382_cast_fp16))[name = tensor<string, []>("op_4687_cast_fp16")];
+            tensor<fp16, []> var_4688_to_fp16 = const()[name = tensor<string, []>("op_4688_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_467_cast_fp16 = mul(x = var_4687_cast_fp16, y = var_4688_to_fp16)[name = tensor<string, []>("aw_chunk_467_cast_fp16")];
+            tensor<string, []> var_4691_equation_0 = const()[name = tensor<string, []>("op_4691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4691_cast_fp16 = einsum(equation = var_4691_equation_0, values = (var_4469_cast_fp16, var_4389_cast_fp16))[name = tensor<string, []>("op_4691_cast_fp16")];
+            tensor<fp16, []> var_4692_to_fp16 = const()[name = tensor<string, []>("op_4692_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_469_cast_fp16 = mul(x = var_4691_cast_fp16, y = var_4692_to_fp16)[name = tensor<string, []>("aw_chunk_469_cast_fp16")];
+            tensor<string, []> var_4695_equation_0 = const()[name = tensor<string, []>("op_4695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4695_cast_fp16 = einsum(equation = var_4695_equation_0, values = (var_4469_cast_fp16, var_4396_cast_fp16))[name = tensor<string, []>("op_4695_cast_fp16")];
+            tensor<fp16, []> var_4696_to_fp16 = const()[name = tensor<string, []>("op_4696_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_471_cast_fp16 = mul(x = var_4695_cast_fp16, y = var_4696_to_fp16)[name = tensor<string, []>("aw_chunk_471_cast_fp16")];
+            tensor<string, []> var_4699_equation_0 = const()[name = tensor<string, []>("op_4699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4699_cast_fp16 = einsum(equation = var_4699_equation_0, values = (var_4473_cast_fp16, var_4403_cast_fp16))[name = tensor<string, []>("op_4699_cast_fp16")];
+            tensor<fp16, []> var_4700_to_fp16 = const()[name = tensor<string, []>("op_4700_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_473_cast_fp16 = mul(x = var_4699_cast_fp16, y = var_4700_to_fp16)[name = tensor<string, []>("aw_chunk_473_cast_fp16")];
+            tensor<string, []> var_4703_equation_0 = const()[name = tensor<string, []>("op_4703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4703_cast_fp16 = einsum(equation = var_4703_equation_0, values = (var_4473_cast_fp16, var_4410_cast_fp16))[name = tensor<string, []>("op_4703_cast_fp16")];
+            tensor<fp16, []> var_4704_to_fp16 = const()[name = tensor<string, []>("op_4704_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_475_cast_fp16 = mul(x = var_4703_cast_fp16, y = var_4704_to_fp16)[name = tensor<string, []>("aw_chunk_475_cast_fp16")];
+            tensor<string, []> var_4707_equation_0 = const()[name = tensor<string, []>("op_4707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4707_cast_fp16 = einsum(equation = var_4707_equation_0, values = (var_4473_cast_fp16, var_4417_cast_fp16))[name = tensor<string, []>("op_4707_cast_fp16")];
+            tensor<fp16, []> var_4708_to_fp16 = const()[name = tensor<string, []>("op_4708_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_477_cast_fp16 = mul(x = var_4707_cast_fp16, y = var_4708_to_fp16)[name = tensor<string, []>("aw_chunk_477_cast_fp16")];
+            tensor<string, []> var_4711_equation_0 = const()[name = tensor<string, []>("op_4711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4711_cast_fp16 = einsum(equation = var_4711_equation_0, values = (var_4473_cast_fp16, var_4424_cast_fp16))[name = tensor<string, []>("op_4711_cast_fp16")];
+            tensor<fp16, []> var_4712_to_fp16 = const()[name = tensor<string, []>("op_4712_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_479_cast_fp16 = mul(x = var_4711_cast_fp16, y = var_4712_to_fp16)[name = tensor<string, []>("aw_chunk_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4714_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_385_cast_fp16)[name = tensor<string, []>("op_4714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4715_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_387_cast_fp16)[name = tensor<string, []>("op_4715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4716_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_389_cast_fp16)[name = tensor<string, []>("op_4716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4717_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_391_cast_fp16)[name = tensor<string, []>("op_4717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4718_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_393_cast_fp16)[name = tensor<string, []>("op_4718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4719_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_395_cast_fp16)[name = tensor<string, []>("op_4719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4720_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_397_cast_fp16)[name = tensor<string, []>("op_4720_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4721_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_399_cast_fp16)[name = tensor<string, []>("op_4721_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4722_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_401_cast_fp16)[name = tensor<string, []>("op_4722_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4723_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_403_cast_fp16)[name = tensor<string, []>("op_4723_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4724_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_405_cast_fp16)[name = tensor<string, []>("op_4724_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4725_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_407_cast_fp16)[name = tensor<string, []>("op_4725_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4726_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_409_cast_fp16)[name = tensor<string, []>("op_4726_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4727_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_411_cast_fp16)[name = tensor<string, []>("op_4727_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4728_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_413_cast_fp16)[name = tensor<string, []>("op_4728_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4729_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_415_cast_fp16)[name = tensor<string, []>("op_4729_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4730_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_417_cast_fp16)[name = tensor<string, []>("op_4730_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4731_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_419_cast_fp16)[name = tensor<string, []>("op_4731_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4732_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_421_cast_fp16)[name = tensor<string, []>("op_4732_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4733_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_423_cast_fp16)[name = tensor<string, []>("op_4733_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4734_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_425_cast_fp16)[name = tensor<string, []>("op_4734_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4735_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_427_cast_fp16)[name = tensor<string, []>("op_4735_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4736_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_429_cast_fp16)[name = tensor<string, []>("op_4736_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4737_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_431_cast_fp16)[name = tensor<string, []>("op_4737_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4738_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_433_cast_fp16)[name = tensor<string, []>("op_4738_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4739_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_435_cast_fp16)[name = tensor<string, []>("op_4739_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4740_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_437_cast_fp16)[name = tensor<string, []>("op_4740_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4741_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_439_cast_fp16)[name = tensor<string, []>("op_4741_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4742_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_441_cast_fp16)[name = tensor<string, []>("op_4742_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4743_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_443_cast_fp16)[name = tensor<string, []>("op_4743_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4744_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_445_cast_fp16)[name = tensor<string, []>("op_4744_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4745_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_447_cast_fp16)[name = tensor<string, []>("op_4745_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4746_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_449_cast_fp16)[name = tensor<string, []>("op_4746_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4747_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_451_cast_fp16)[name = tensor<string, []>("op_4747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4748_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_453_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4749_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_455_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4750_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_457_cast_fp16)[name = tensor<string, []>("op_4750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4751_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_459_cast_fp16)[name = tensor<string, []>("op_4751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4752_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_461_cast_fp16)[name = tensor<string, []>("op_4752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4753_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_463_cast_fp16)[name = tensor<string, []>("op_4753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4754_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_465_cast_fp16)[name = tensor<string, []>("op_4754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4755_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_467_cast_fp16)[name = tensor<string, []>("op_4755_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4756_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_469_cast_fp16)[name = tensor<string, []>("op_4756_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4757_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_471_cast_fp16)[name = tensor<string, []>("op_4757_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4758_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_473_cast_fp16)[name = tensor<string, []>("op_4758_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4759_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_475_cast_fp16)[name = tensor<string, []>("op_4759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4760_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_477_cast_fp16)[name = tensor<string, []>("op_4760_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_4761_cast_fp16 = softmax(axis = var_3987, x = aw_chunk_479_cast_fp16)[name = tensor<string, []>("op_4761_cast_fp16")];
+            tensor<string, []> var_4763_equation_0 = const()[name = tensor<string, []>("op_4763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4763_cast_fp16 = einsum(equation = var_4763_equation_0, values = (var_4475_cast_fp16, var_4714_cast_fp16))[name = tensor<string, []>("op_4763_cast_fp16")];
+            tensor<string, []> var_4765_equation_0 = const()[name = tensor<string, []>("op_4765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4765_cast_fp16 = einsum(equation = var_4765_equation_0, values = (var_4475_cast_fp16, var_4715_cast_fp16))[name = tensor<string, []>("op_4765_cast_fp16")];
+            tensor<string, []> var_4767_equation_0 = const()[name = tensor<string, []>("op_4767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4767_cast_fp16 = einsum(equation = var_4767_equation_0, values = (var_4475_cast_fp16, var_4716_cast_fp16))[name = tensor<string, []>("op_4767_cast_fp16")];
+            tensor<string, []> var_4769_equation_0 = const()[name = tensor<string, []>("op_4769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4769_cast_fp16 = einsum(equation = var_4769_equation_0, values = (var_4475_cast_fp16, var_4717_cast_fp16))[name = tensor<string, []>("op_4769_cast_fp16")];
+            tensor<string, []> var_4771_equation_0 = const()[name = tensor<string, []>("op_4771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4771_cast_fp16 = einsum(equation = var_4771_equation_0, values = (var_4479_cast_fp16, var_4718_cast_fp16))[name = tensor<string, []>("op_4771_cast_fp16")];
+            tensor<string, []> var_4773_equation_0 = const()[name = tensor<string, []>("op_4773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4773_cast_fp16 = einsum(equation = var_4773_equation_0, values = (var_4479_cast_fp16, var_4719_cast_fp16))[name = tensor<string, []>("op_4773_cast_fp16")];
+            tensor<string, []> var_4775_equation_0 = const()[name = tensor<string, []>("op_4775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4775_cast_fp16 = einsum(equation = var_4775_equation_0, values = (var_4479_cast_fp16, var_4720_cast_fp16))[name = tensor<string, []>("op_4775_cast_fp16")];
+            tensor<string, []> var_4777_equation_0 = const()[name = tensor<string, []>("op_4777_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4777_cast_fp16 = einsum(equation = var_4777_equation_0, values = (var_4479_cast_fp16, var_4721_cast_fp16))[name = tensor<string, []>("op_4777_cast_fp16")];
+            tensor<string, []> var_4779_equation_0 = const()[name = tensor<string, []>("op_4779_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4779_cast_fp16 = einsum(equation = var_4779_equation_0, values = (var_4483_cast_fp16, var_4722_cast_fp16))[name = tensor<string, []>("op_4779_cast_fp16")];
+            tensor<string, []> var_4781_equation_0 = const()[name = tensor<string, []>("op_4781_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4781_cast_fp16 = einsum(equation = var_4781_equation_0, values = (var_4483_cast_fp16, var_4723_cast_fp16))[name = tensor<string, []>("op_4781_cast_fp16")];
+            tensor<string, []> var_4783_equation_0 = const()[name = tensor<string, []>("op_4783_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4783_cast_fp16 = einsum(equation = var_4783_equation_0, values = (var_4483_cast_fp16, var_4724_cast_fp16))[name = tensor<string, []>("op_4783_cast_fp16")];
+            tensor<string, []> var_4785_equation_0 = const()[name = tensor<string, []>("op_4785_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4785_cast_fp16 = einsum(equation = var_4785_equation_0, values = (var_4483_cast_fp16, var_4725_cast_fp16))[name = tensor<string, []>("op_4785_cast_fp16")];
+            tensor<string, []> var_4787_equation_0 = const()[name = tensor<string, []>("op_4787_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4787_cast_fp16 = einsum(equation = var_4787_equation_0, values = (var_4487_cast_fp16, var_4726_cast_fp16))[name = tensor<string, []>("op_4787_cast_fp16")];
+            tensor<string, []> var_4789_equation_0 = const()[name = tensor<string, []>("op_4789_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4789_cast_fp16 = einsum(equation = var_4789_equation_0, values = (var_4487_cast_fp16, var_4727_cast_fp16))[name = tensor<string, []>("op_4789_cast_fp16")];
+            tensor<string, []> var_4791_equation_0 = const()[name = tensor<string, []>("op_4791_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4791_cast_fp16 = einsum(equation = var_4791_equation_0, values = (var_4487_cast_fp16, var_4728_cast_fp16))[name = tensor<string, []>("op_4791_cast_fp16")];
+            tensor<string, []> var_4793_equation_0 = const()[name = tensor<string, []>("op_4793_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4793_cast_fp16 = einsum(equation = var_4793_equation_0, values = (var_4487_cast_fp16, var_4729_cast_fp16))[name = tensor<string, []>("op_4793_cast_fp16")];
+            tensor<string, []> var_4795_equation_0 = const()[name = tensor<string, []>("op_4795_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4795_cast_fp16 = einsum(equation = var_4795_equation_0, values = (var_4491_cast_fp16, var_4730_cast_fp16))[name = tensor<string, []>("op_4795_cast_fp16")];
+            tensor<string, []> var_4797_equation_0 = const()[name = tensor<string, []>("op_4797_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4797_cast_fp16 = einsum(equation = var_4797_equation_0, values = (var_4491_cast_fp16, var_4731_cast_fp16))[name = tensor<string, []>("op_4797_cast_fp16")];
+            tensor<string, []> var_4799_equation_0 = const()[name = tensor<string, []>("op_4799_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4799_cast_fp16 = einsum(equation = var_4799_equation_0, values = (var_4491_cast_fp16, var_4732_cast_fp16))[name = tensor<string, []>("op_4799_cast_fp16")];
+            tensor<string, []> var_4801_equation_0 = const()[name = tensor<string, []>("op_4801_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4801_cast_fp16 = einsum(equation = var_4801_equation_0, values = (var_4491_cast_fp16, var_4733_cast_fp16))[name = tensor<string, []>("op_4801_cast_fp16")];
+            tensor<string, []> var_4803_equation_0 = const()[name = tensor<string, []>("op_4803_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4803_cast_fp16 = einsum(equation = var_4803_equation_0, values = (var_4495_cast_fp16, var_4734_cast_fp16))[name = tensor<string, []>("op_4803_cast_fp16")];
+            tensor<string, []> var_4805_equation_0 = const()[name = tensor<string, []>("op_4805_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4805_cast_fp16 = einsum(equation = var_4805_equation_0, values = (var_4495_cast_fp16, var_4735_cast_fp16))[name = tensor<string, []>("op_4805_cast_fp16")];
+            tensor<string, []> var_4807_equation_0 = const()[name = tensor<string, []>("op_4807_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4807_cast_fp16 = einsum(equation = var_4807_equation_0, values = (var_4495_cast_fp16, var_4736_cast_fp16))[name = tensor<string, []>("op_4807_cast_fp16")];
+            tensor<string, []> var_4809_equation_0 = const()[name = tensor<string, []>("op_4809_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4809_cast_fp16 = einsum(equation = var_4809_equation_0, values = (var_4495_cast_fp16, var_4737_cast_fp16))[name = tensor<string, []>("op_4809_cast_fp16")];
+            tensor<string, []> var_4811_equation_0 = const()[name = tensor<string, []>("op_4811_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4811_cast_fp16 = einsum(equation = var_4811_equation_0, values = (var_4499_cast_fp16, var_4738_cast_fp16))[name = tensor<string, []>("op_4811_cast_fp16")];
+            tensor<string, []> var_4813_equation_0 = const()[name = tensor<string, []>("op_4813_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4813_cast_fp16 = einsum(equation = var_4813_equation_0, values = (var_4499_cast_fp16, var_4739_cast_fp16))[name = tensor<string, []>("op_4813_cast_fp16")];
+            tensor<string, []> var_4815_equation_0 = const()[name = tensor<string, []>("op_4815_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4815_cast_fp16 = einsum(equation = var_4815_equation_0, values = (var_4499_cast_fp16, var_4740_cast_fp16))[name = tensor<string, []>("op_4815_cast_fp16")];
+            tensor<string, []> var_4817_equation_0 = const()[name = tensor<string, []>("op_4817_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4817_cast_fp16 = einsum(equation = var_4817_equation_0, values = (var_4499_cast_fp16, var_4741_cast_fp16))[name = tensor<string, []>("op_4817_cast_fp16")];
+            tensor<string, []> var_4819_equation_0 = const()[name = tensor<string, []>("op_4819_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4819_cast_fp16 = einsum(equation = var_4819_equation_0, values = (var_4503_cast_fp16, var_4742_cast_fp16))[name = tensor<string, []>("op_4819_cast_fp16")];
+            tensor<string, []> var_4821_equation_0 = const()[name = tensor<string, []>("op_4821_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4821_cast_fp16 = einsum(equation = var_4821_equation_0, values = (var_4503_cast_fp16, var_4743_cast_fp16))[name = tensor<string, []>("op_4821_cast_fp16")];
+            tensor<string, []> var_4823_equation_0 = const()[name = tensor<string, []>("op_4823_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4823_cast_fp16 = einsum(equation = var_4823_equation_0, values = (var_4503_cast_fp16, var_4744_cast_fp16))[name = tensor<string, []>("op_4823_cast_fp16")];
+            tensor<string, []> var_4825_equation_0 = const()[name = tensor<string, []>("op_4825_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4825_cast_fp16 = einsum(equation = var_4825_equation_0, values = (var_4503_cast_fp16, var_4745_cast_fp16))[name = tensor<string, []>("op_4825_cast_fp16")];
+            tensor<string, []> var_4827_equation_0 = const()[name = tensor<string, []>("op_4827_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4827_cast_fp16 = einsum(equation = var_4827_equation_0, values = (var_4507_cast_fp16, var_4746_cast_fp16))[name = tensor<string, []>("op_4827_cast_fp16")];
+            tensor<string, []> var_4829_equation_0 = const()[name = tensor<string, []>("op_4829_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4829_cast_fp16 = einsum(equation = var_4829_equation_0, values = (var_4507_cast_fp16, var_4747_cast_fp16))[name = tensor<string, []>("op_4829_cast_fp16")];
+            tensor<string, []> var_4831_equation_0 = const()[name = tensor<string, []>("op_4831_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4831_cast_fp16 = einsum(equation = var_4831_equation_0, values = (var_4507_cast_fp16, var_4748_cast_fp16))[name = tensor<string, []>("op_4831_cast_fp16")];
+            tensor<string, []> var_4833_equation_0 = const()[name = tensor<string, []>("op_4833_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4833_cast_fp16 = einsum(equation = var_4833_equation_0, values = (var_4507_cast_fp16, var_4749_cast_fp16))[name = tensor<string, []>("op_4833_cast_fp16")];
+            tensor<string, []> var_4835_equation_0 = const()[name = tensor<string, []>("op_4835_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4835_cast_fp16 = einsum(equation = var_4835_equation_0, values = (var_4511_cast_fp16, var_4750_cast_fp16))[name = tensor<string, []>("op_4835_cast_fp16")];
+            tensor<string, []> var_4837_equation_0 = const()[name = tensor<string, []>("op_4837_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4837_cast_fp16 = einsum(equation = var_4837_equation_0, values = (var_4511_cast_fp16, var_4751_cast_fp16))[name = tensor<string, []>("op_4837_cast_fp16")];
+            tensor<string, []> var_4839_equation_0 = const()[name = tensor<string, []>("op_4839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4839_cast_fp16 = einsum(equation = var_4839_equation_0, values = (var_4511_cast_fp16, var_4752_cast_fp16))[name = tensor<string, []>("op_4839_cast_fp16")];
+            tensor<string, []> var_4841_equation_0 = const()[name = tensor<string, []>("op_4841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4841_cast_fp16 = einsum(equation = var_4841_equation_0, values = (var_4511_cast_fp16, var_4753_cast_fp16))[name = tensor<string, []>("op_4841_cast_fp16")];
+            tensor<string, []> var_4843_equation_0 = const()[name = tensor<string, []>("op_4843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4843_cast_fp16 = einsum(equation = var_4843_equation_0, values = (var_4515_cast_fp16, var_4754_cast_fp16))[name = tensor<string, []>("op_4843_cast_fp16")];
+            tensor<string, []> var_4845_equation_0 = const()[name = tensor<string, []>("op_4845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4845_cast_fp16 = einsum(equation = var_4845_equation_0, values = (var_4515_cast_fp16, var_4755_cast_fp16))[name = tensor<string, []>("op_4845_cast_fp16")];
+            tensor<string, []> var_4847_equation_0 = const()[name = tensor<string, []>("op_4847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4847_cast_fp16 = einsum(equation = var_4847_equation_0, values = (var_4515_cast_fp16, var_4756_cast_fp16))[name = tensor<string, []>("op_4847_cast_fp16")];
+            tensor<string, []> var_4849_equation_0 = const()[name = tensor<string, []>("op_4849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4849_cast_fp16 = einsum(equation = var_4849_equation_0, values = (var_4515_cast_fp16, var_4757_cast_fp16))[name = tensor<string, []>("op_4849_cast_fp16")];
+            tensor<string, []> var_4851_equation_0 = const()[name = tensor<string, []>("op_4851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4851_cast_fp16 = einsum(equation = var_4851_equation_0, values = (var_4519_cast_fp16, var_4758_cast_fp16))[name = tensor<string, []>("op_4851_cast_fp16")];
+            tensor<string, []> var_4853_equation_0 = const()[name = tensor<string, []>("op_4853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4853_cast_fp16 = einsum(equation = var_4853_equation_0, values = (var_4519_cast_fp16, var_4759_cast_fp16))[name = tensor<string, []>("op_4853_cast_fp16")];
+            tensor<string, []> var_4855_equation_0 = const()[name = tensor<string, []>("op_4855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4855_cast_fp16 = einsum(equation = var_4855_equation_0, values = (var_4519_cast_fp16, var_4760_cast_fp16))[name = tensor<string, []>("op_4855_cast_fp16")];
+            tensor<string, []> var_4857_equation_0 = const()[name = tensor<string, []>("op_4857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_4857_cast_fp16 = einsum(equation = var_4857_equation_0, values = (var_4519_cast_fp16, var_4761_cast_fp16))[name = tensor<string, []>("op_4857_cast_fp16")];
+            tensor<bool, []> var_4859_interleave_0 = const()[name = tensor<string, []>("op_4859_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4859_cast_fp16 = concat(axis = var_3970, interleave = var_4859_interleave_0, values = (var_4763_cast_fp16, var_4765_cast_fp16, var_4767_cast_fp16, var_4769_cast_fp16))[name = tensor<string, []>("op_4859_cast_fp16")];
+            tensor<bool, []> var_4861_interleave_0 = const()[name = tensor<string, []>("op_4861_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4861_cast_fp16 = concat(axis = var_3970, interleave = var_4861_interleave_0, values = (var_4771_cast_fp16, var_4773_cast_fp16, var_4775_cast_fp16, var_4777_cast_fp16))[name = tensor<string, []>("op_4861_cast_fp16")];
+            tensor<bool, []> var_4863_interleave_0 = const()[name = tensor<string, []>("op_4863_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4863_cast_fp16 = concat(axis = var_3970, interleave = var_4863_interleave_0, values = (var_4779_cast_fp16, var_4781_cast_fp16, var_4783_cast_fp16, var_4785_cast_fp16))[name = tensor<string, []>("op_4863_cast_fp16")];
+            tensor<bool, []> var_4865_interleave_0 = const()[name = tensor<string, []>("op_4865_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4865_cast_fp16 = concat(axis = var_3970, interleave = var_4865_interleave_0, values = (var_4787_cast_fp16, var_4789_cast_fp16, var_4791_cast_fp16, var_4793_cast_fp16))[name = tensor<string, []>("op_4865_cast_fp16")];
+            tensor<bool, []> var_4867_interleave_0 = const()[name = tensor<string, []>("op_4867_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4867_cast_fp16 = concat(axis = var_3970, interleave = var_4867_interleave_0, values = (var_4795_cast_fp16, var_4797_cast_fp16, var_4799_cast_fp16, var_4801_cast_fp16))[name = tensor<string, []>("op_4867_cast_fp16")];
+            tensor<bool, []> var_4869_interleave_0 = const()[name = tensor<string, []>("op_4869_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4869_cast_fp16 = concat(axis = var_3970, interleave = var_4869_interleave_0, values = (var_4803_cast_fp16, var_4805_cast_fp16, var_4807_cast_fp16, var_4809_cast_fp16))[name = tensor<string, []>("op_4869_cast_fp16")];
+            tensor<bool, []> var_4871_interleave_0 = const()[name = tensor<string, []>("op_4871_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4871_cast_fp16 = concat(axis = var_3970, interleave = var_4871_interleave_0, values = (var_4811_cast_fp16, var_4813_cast_fp16, var_4815_cast_fp16, var_4817_cast_fp16))[name = tensor<string, []>("op_4871_cast_fp16")];
+            tensor<bool, []> var_4873_interleave_0 = const()[name = tensor<string, []>("op_4873_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4873_cast_fp16 = concat(axis = var_3970, interleave = var_4873_interleave_0, values = (var_4819_cast_fp16, var_4821_cast_fp16, var_4823_cast_fp16, var_4825_cast_fp16))[name = tensor<string, []>("op_4873_cast_fp16")];
+            tensor<bool, []> var_4875_interleave_0 = const()[name = tensor<string, []>("op_4875_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4875_cast_fp16 = concat(axis = var_3970, interleave = var_4875_interleave_0, values = (var_4827_cast_fp16, var_4829_cast_fp16, var_4831_cast_fp16, var_4833_cast_fp16))[name = tensor<string, []>("op_4875_cast_fp16")];
+            tensor<bool, []> var_4877_interleave_0 = const()[name = tensor<string, []>("op_4877_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4877_cast_fp16 = concat(axis = var_3970, interleave = var_4877_interleave_0, values = (var_4835_cast_fp16, var_4837_cast_fp16, var_4839_cast_fp16, var_4841_cast_fp16))[name = tensor<string, []>("op_4877_cast_fp16")];
+            tensor<bool, []> var_4879_interleave_0 = const()[name = tensor<string, []>("op_4879_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4879_cast_fp16 = concat(axis = var_3970, interleave = var_4879_interleave_0, values = (var_4843_cast_fp16, var_4845_cast_fp16, var_4847_cast_fp16, var_4849_cast_fp16))[name = tensor<string, []>("op_4879_cast_fp16")];
+            tensor<bool, []> var_4881_interleave_0 = const()[name = tensor<string, []>("op_4881_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4881_cast_fp16 = concat(axis = var_3970, interleave = var_4881_interleave_0, values = (var_4851_cast_fp16, var_4853_cast_fp16, var_4855_cast_fp16, var_4857_cast_fp16))[name = tensor<string, []>("op_4881_cast_fp16")];
+            tensor<bool, []> input_33_interleave_0 = const()[name = tensor<string, []>("input_33_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_33_cast_fp16 = concat(axis = var_3987, interleave = input_33_interleave_0, values = (var_4859_cast_fp16, var_4861_cast_fp16, var_4863_cast_fp16, var_4865_cast_fp16, var_4867_cast_fp16, var_4869_cast_fp16, var_4871_cast_fp16, var_4873_cast_fp16, var_4875_cast_fp16, var_4877_cast_fp16, var_4879_cast_fp16, var_4881_cast_fp16))[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_4886 = const()[name = tensor<string, []>("op_4886"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4888 = const()[name = tensor<string, []>("op_4888"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66464448)))];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67644160)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_4888, groups = var_3987, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = var_4886, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> var_4894 = const()[name = tensor<string, []>("op_4894"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_4894, keep_dims = var_3988, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_4898 = const()[name = tensor<string, []>("op_4898"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4899_cast_fp16 = reduce_mean(axes = var_4898, keep_dims = var_3988, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_4899_cast_fp16")];
+            tensor<fp16, []> var_4900_to_fp16 = const()[name = tensor<string, []>("op_4900_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4901_cast_fp16 = add(x = var_4899_cast_fp16, y = var_4900_to_fp16)[name = tensor<string, []>("op_4901_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_4901_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67645760)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67647360)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_4912 = const()[name = tensor<string, []>("op_4912"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4914 = const()[name = tensor<string, []>("op_4914"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67648960)))];
+            tensor<fp16, [3072]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72367616)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_4914, groups = var_3987, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_4912, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<int32, [2]> var_4920 = const()[name = tensor<string, []>("op_4920"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4922 = const()[name = tensor<string, []>("op_4922"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72373824)))];
+            tensor<fp16, [768]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77092480)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_4922, groups = var_3987, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_4920, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_4929 = const()[name = tensor<string, []>("op_4929"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_4946 = const()[name = tensor<string, []>("op_4946"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_4947 = const()[name = tensor<string, []>("op_4947"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_4957 = const()[name = tensor<string, []>("op_4957"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_4957, keep_dims = var_4947, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_4961 = const()[name = tensor<string, []>("op_4961"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_4962_cast_fp16 = reduce_mean(axes = var_4961, keep_dims = var_4947, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_4962_cast_fp16")];
+            tensor<fp16, []> var_4963_to_fp16 = const()[name = tensor<string, []>("op_4963_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_4964_cast_fp16 = add(x = var_4962_cast_fp16, y = var_4963_to_fp16)[name = tensor<string, []>("op_4964_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_4964_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77094080)))];
+            tensor<fp16, [768]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77095680)))];
+            tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<int32, [2]> var_4979 = const()[name = tensor<string, []>("op_4979"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4981 = const()[name = tensor<string, []>("op_4981"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77097280)))];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78276992)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_11_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_4981, groups = var_4946, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_4979, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<int32, [2]> var_4985 = const()[name = tensor<string, []>("op_4985"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4987 = const()[name = tensor<string, []>("op_4987"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78278592)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_11_cast_fp16 = conv(dilations = var_4987, groups = var_4946, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_4985, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<int32, [2]> var_4992 = const()[name = tensor<string, []>("op_4992"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_4994 = const()[name = tensor<string, []>("op_4994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79458304)))];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80638016)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_4994, groups = var_4946, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_4992, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_5001_begin_0 = const()[name = tensor<string, []>("op_5001_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5001_end_0 = const()[name = tensor<string, []>("op_5001_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5001_end_mask_0 = const()[name = tensor<string, []>("op_5001_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5001_cast_fp16 = slice_by_index(begin = var_5001_begin_0, end = var_5001_end_0, end_mask = var_5001_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5001_cast_fp16")];
+            tensor<int32, [4]> var_5005_begin_0 = const()[name = tensor<string, []>("op_5005_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_5005_end_0 = const()[name = tensor<string, []>("op_5005_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_5005_end_mask_0 = const()[name = tensor<string, []>("op_5005_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5005_cast_fp16 = slice_by_index(begin = var_5005_begin_0, end = var_5005_end_0, end_mask = var_5005_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5005_cast_fp16")];
+            tensor<int32, [4]> var_5009_begin_0 = const()[name = tensor<string, []>("op_5009_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_5009_end_0 = const()[name = tensor<string, []>("op_5009_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_5009_end_mask_0 = const()[name = tensor<string, []>("op_5009_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5009_cast_fp16 = slice_by_index(begin = var_5009_begin_0, end = var_5009_end_0, end_mask = var_5009_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5009_cast_fp16")];
+            tensor<int32, [4]> var_5013_begin_0 = const()[name = tensor<string, []>("op_5013_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_5013_end_0 = const()[name = tensor<string, []>("op_5013_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_5013_end_mask_0 = const()[name = tensor<string, []>("op_5013_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5013_cast_fp16 = slice_by_index(begin = var_5013_begin_0, end = var_5013_end_0, end_mask = var_5013_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5013_cast_fp16")];
+            tensor<int32, [4]> var_5017_begin_0 = const()[name = tensor<string, []>("op_5017_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_5017_end_0 = const()[name = tensor<string, []>("op_5017_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_5017_end_mask_0 = const()[name = tensor<string, []>("op_5017_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5017_cast_fp16 = slice_by_index(begin = var_5017_begin_0, end = var_5017_end_0, end_mask = var_5017_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<int32, [4]> var_5021_begin_0 = const()[name = tensor<string, []>("op_5021_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_5021_end_0 = const()[name = tensor<string, []>("op_5021_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_5021_end_mask_0 = const()[name = tensor<string, []>("op_5021_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5021_cast_fp16 = slice_by_index(begin = var_5021_begin_0, end = var_5021_end_0, end_mask = var_5021_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5021_cast_fp16")];
+            tensor<int32, [4]> var_5025_begin_0 = const()[name = tensor<string, []>("op_5025_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_5025_end_0 = const()[name = tensor<string, []>("op_5025_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_5025_end_mask_0 = const()[name = tensor<string, []>("op_5025_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5025_cast_fp16 = slice_by_index(begin = var_5025_begin_0, end = var_5025_end_0, end_mask = var_5025_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5025_cast_fp16")];
+            tensor<int32, [4]> var_5029_begin_0 = const()[name = tensor<string, []>("op_5029_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_5029_end_0 = const()[name = tensor<string, []>("op_5029_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_5029_end_mask_0 = const()[name = tensor<string, []>("op_5029_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5029_cast_fp16 = slice_by_index(begin = var_5029_begin_0, end = var_5029_end_0, end_mask = var_5029_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5029_cast_fp16")];
+            tensor<int32, [4]> var_5033_begin_0 = const()[name = tensor<string, []>("op_5033_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_5033_end_0 = const()[name = tensor<string, []>("op_5033_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_5033_end_mask_0 = const()[name = tensor<string, []>("op_5033_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5033_cast_fp16 = slice_by_index(begin = var_5033_begin_0, end = var_5033_end_0, end_mask = var_5033_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5033_cast_fp16")];
+            tensor<int32, [4]> var_5037_begin_0 = const()[name = tensor<string, []>("op_5037_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_5037_end_0 = const()[name = tensor<string, []>("op_5037_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_5037_end_mask_0 = const()[name = tensor<string, []>("op_5037_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5037_cast_fp16 = slice_by_index(begin = var_5037_begin_0, end = var_5037_end_0, end_mask = var_5037_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5037_cast_fp16")];
+            tensor<int32, [4]> var_5041_begin_0 = const()[name = tensor<string, []>("op_5041_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_5041_end_0 = const()[name = tensor<string, []>("op_5041_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_5041_end_mask_0 = const()[name = tensor<string, []>("op_5041_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5041_cast_fp16 = slice_by_index(begin = var_5041_begin_0, end = var_5041_end_0, end_mask = var_5041_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5041_cast_fp16")];
+            tensor<int32, [4]> var_5045_begin_0 = const()[name = tensor<string, []>("op_5045_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_5045_end_0 = const()[name = tensor<string, []>("op_5045_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_5045_end_mask_0 = const()[name = tensor<string, []>("op_5045_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5045_cast_fp16 = slice_by_index(begin = var_5045_begin_0, end = var_5045_end_0, end_mask = var_5045_end_mask_0, x = query_11_cast_fp16)[name = tensor<string, []>("op_5045_cast_fp16")];
+            tensor<int32, [4]> var_5054_begin_0 = const()[name = tensor<string, []>("op_5054_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5054_end_0 = const()[name = tensor<string, []>("op_5054_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5054_end_mask_0 = const()[name = tensor<string, []>("op_5054_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5054_cast_fp16 = slice_by_index(begin = var_5054_begin_0, end = var_5054_end_0, end_mask = var_5054_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5054_cast_fp16")];
+            tensor<int32, [4]> var_5061_begin_0 = const()[name = tensor<string, []>("op_5061_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5061_end_0 = const()[name = tensor<string, []>("op_5061_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5061_end_mask_0 = const()[name = tensor<string, []>("op_5061_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5061_cast_fp16 = slice_by_index(begin = var_5061_begin_0, end = var_5061_end_0, end_mask = var_5061_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5061_cast_fp16")];
+            tensor<int32, [4]> var_5068_begin_0 = const()[name = tensor<string, []>("op_5068_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5068_end_0 = const()[name = tensor<string, []>("op_5068_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5068_end_mask_0 = const()[name = tensor<string, []>("op_5068_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5068_cast_fp16 = slice_by_index(begin = var_5068_begin_0, end = var_5068_end_0, end_mask = var_5068_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5068_cast_fp16")];
+            tensor<int32, [4]> var_5075_begin_0 = const()[name = tensor<string, []>("op_5075_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5075_end_0 = const()[name = tensor<string, []>("op_5075_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5075_end_mask_0 = const()[name = tensor<string, []>("op_5075_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5075_cast_fp16 = slice_by_index(begin = var_5075_begin_0, end = var_5075_end_0, end_mask = var_5075_end_mask_0, x = var_5001_cast_fp16)[name = tensor<string, []>("op_5075_cast_fp16")];
+            tensor<int32, [4]> var_5082_begin_0 = const()[name = tensor<string, []>("op_5082_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5082_end_0 = const()[name = tensor<string, []>("op_5082_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5082_end_mask_0 = const()[name = tensor<string, []>("op_5082_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5082_cast_fp16 = slice_by_index(begin = var_5082_begin_0, end = var_5082_end_0, end_mask = var_5082_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5082_cast_fp16")];
+            tensor<int32, [4]> var_5089_begin_0 = const()[name = tensor<string, []>("op_5089_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5089_end_0 = const()[name = tensor<string, []>("op_5089_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5089_end_mask_0 = const()[name = tensor<string, []>("op_5089_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = var_5089_end_0, end_mask = var_5089_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5089_cast_fp16")];
+            tensor<int32, [4]> var_5096_begin_0 = const()[name = tensor<string, []>("op_5096_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5096_end_0 = const()[name = tensor<string, []>("op_5096_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5096_end_mask_0 = const()[name = tensor<string, []>("op_5096_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5096_cast_fp16 = slice_by_index(begin = var_5096_begin_0, end = var_5096_end_0, end_mask = var_5096_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5096_cast_fp16")];
+            tensor<int32, [4]> var_5103_begin_0 = const()[name = tensor<string, []>("op_5103_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5103_end_0 = const()[name = tensor<string, []>("op_5103_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5103_end_mask_0 = const()[name = tensor<string, []>("op_5103_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5103_cast_fp16 = slice_by_index(begin = var_5103_begin_0, end = var_5103_end_0, end_mask = var_5103_end_mask_0, x = var_5005_cast_fp16)[name = tensor<string, []>("op_5103_cast_fp16")];
+            tensor<int32, [4]> var_5110_begin_0 = const()[name = tensor<string, []>("op_5110_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5110_end_0 = const()[name = tensor<string, []>("op_5110_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5110_end_mask_0 = const()[name = tensor<string, []>("op_5110_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5110_cast_fp16 = slice_by_index(begin = var_5110_begin_0, end = var_5110_end_0, end_mask = var_5110_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5110_cast_fp16")];
+            tensor<int32, [4]> var_5117_begin_0 = const()[name = tensor<string, []>("op_5117_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5117_end_0 = const()[name = tensor<string, []>("op_5117_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5117_end_mask_0 = const()[name = tensor<string, []>("op_5117_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5117_cast_fp16 = slice_by_index(begin = var_5117_begin_0, end = var_5117_end_0, end_mask = var_5117_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5117_cast_fp16")];
+            tensor<int32, [4]> var_5124_begin_0 = const()[name = tensor<string, []>("op_5124_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5124_end_0 = const()[name = tensor<string, []>("op_5124_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5124_end_mask_0 = const()[name = tensor<string, []>("op_5124_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5124_cast_fp16 = slice_by_index(begin = var_5124_begin_0, end = var_5124_end_0, end_mask = var_5124_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5124_cast_fp16")];
+            tensor<int32, [4]> var_5131_begin_0 = const()[name = tensor<string, []>("op_5131_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5131_end_0 = const()[name = tensor<string, []>("op_5131_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5131_end_mask_0 = const()[name = tensor<string, []>("op_5131_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5131_cast_fp16 = slice_by_index(begin = var_5131_begin_0, end = var_5131_end_0, end_mask = var_5131_end_mask_0, x = var_5009_cast_fp16)[name = tensor<string, []>("op_5131_cast_fp16")];
+            tensor<int32, [4]> var_5138_begin_0 = const()[name = tensor<string, []>("op_5138_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5138_end_0 = const()[name = tensor<string, []>("op_5138_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5138_end_mask_0 = const()[name = tensor<string, []>("op_5138_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5138_cast_fp16 = slice_by_index(begin = var_5138_begin_0, end = var_5138_end_0, end_mask = var_5138_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5138_cast_fp16")];
+            tensor<int32, [4]> var_5145_begin_0 = const()[name = tensor<string, []>("op_5145_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5145_end_0 = const()[name = tensor<string, []>("op_5145_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5145_end_mask_0 = const()[name = tensor<string, []>("op_5145_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5145_cast_fp16 = slice_by_index(begin = var_5145_begin_0, end = var_5145_end_0, end_mask = var_5145_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5145_cast_fp16")];
+            tensor<int32, [4]> var_5152_begin_0 = const()[name = tensor<string, []>("op_5152_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5152_end_0 = const()[name = tensor<string, []>("op_5152_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5152_end_mask_0 = const()[name = tensor<string, []>("op_5152_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5152_cast_fp16 = slice_by_index(begin = var_5152_begin_0, end = var_5152_end_0, end_mask = var_5152_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5152_cast_fp16")];
+            tensor<int32, [4]> var_5159_begin_0 = const()[name = tensor<string, []>("op_5159_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5159_end_0 = const()[name = tensor<string, []>("op_5159_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5159_end_mask_0 = const()[name = tensor<string, []>("op_5159_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5159_cast_fp16 = slice_by_index(begin = var_5159_begin_0, end = var_5159_end_0, end_mask = var_5159_end_mask_0, x = var_5013_cast_fp16)[name = tensor<string, []>("op_5159_cast_fp16")];
+            tensor<int32, [4]> var_5166_begin_0 = const()[name = tensor<string, []>("op_5166_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5166_end_0 = const()[name = tensor<string, []>("op_5166_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5166_end_mask_0 = const()[name = tensor<string, []>("op_5166_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5166_cast_fp16 = slice_by_index(begin = var_5166_begin_0, end = var_5166_end_0, end_mask = var_5166_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5166_cast_fp16")];
+            tensor<int32, [4]> var_5173_begin_0 = const()[name = tensor<string, []>("op_5173_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5173_end_0 = const()[name = tensor<string, []>("op_5173_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5173_end_mask_0 = const()[name = tensor<string, []>("op_5173_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5173_cast_fp16 = slice_by_index(begin = var_5173_begin_0, end = var_5173_end_0, end_mask = var_5173_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5173_cast_fp16")];
+            tensor<int32, [4]> var_5180_begin_0 = const()[name = tensor<string, []>("op_5180_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5180_end_0 = const()[name = tensor<string, []>("op_5180_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5180_end_mask_0 = const()[name = tensor<string, []>("op_5180_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5180_cast_fp16 = slice_by_index(begin = var_5180_begin_0, end = var_5180_end_0, end_mask = var_5180_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5180_cast_fp16")];
+            tensor<int32, [4]> var_5187_begin_0 = const()[name = tensor<string, []>("op_5187_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5187_end_0 = const()[name = tensor<string, []>("op_5187_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5187_end_mask_0 = const()[name = tensor<string, []>("op_5187_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5187_cast_fp16 = slice_by_index(begin = var_5187_begin_0, end = var_5187_end_0, end_mask = var_5187_end_mask_0, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5187_cast_fp16")];
+            tensor<int32, [4]> var_5194_begin_0 = const()[name = tensor<string, []>("op_5194_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5194_end_0 = const()[name = tensor<string, []>("op_5194_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5194_end_mask_0 = const()[name = tensor<string, []>("op_5194_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5194_cast_fp16 = slice_by_index(begin = var_5194_begin_0, end = var_5194_end_0, end_mask = var_5194_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5194_cast_fp16")];
+            tensor<int32, [4]> var_5201_begin_0 = const()[name = tensor<string, []>("op_5201_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5201_end_0 = const()[name = tensor<string, []>("op_5201_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5201_end_mask_0 = const()[name = tensor<string, []>("op_5201_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5201_cast_fp16 = slice_by_index(begin = var_5201_begin_0, end = var_5201_end_0, end_mask = var_5201_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5201_cast_fp16")];
+            tensor<int32, [4]> var_5208_begin_0 = const()[name = tensor<string, []>("op_5208_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5208_end_0 = const()[name = tensor<string, []>("op_5208_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5208_end_mask_0 = const()[name = tensor<string, []>("op_5208_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5208_cast_fp16 = slice_by_index(begin = var_5208_begin_0, end = var_5208_end_0, end_mask = var_5208_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5208_cast_fp16")];
+            tensor<int32, [4]> var_5215_begin_0 = const()[name = tensor<string, []>("op_5215_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5215_end_0 = const()[name = tensor<string, []>("op_5215_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5215_end_mask_0 = const()[name = tensor<string, []>("op_5215_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5215_cast_fp16 = slice_by_index(begin = var_5215_begin_0, end = var_5215_end_0, end_mask = var_5215_end_mask_0, x = var_5021_cast_fp16)[name = tensor<string, []>("op_5215_cast_fp16")];
+            tensor<int32, [4]> var_5222_begin_0 = const()[name = tensor<string, []>("op_5222_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5222_end_0 = const()[name = tensor<string, []>("op_5222_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5222_end_mask_0 = const()[name = tensor<string, []>("op_5222_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5222_cast_fp16 = slice_by_index(begin = var_5222_begin_0, end = var_5222_end_0, end_mask = var_5222_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5222_cast_fp16")];
+            tensor<int32, [4]> var_5229_begin_0 = const()[name = tensor<string, []>("op_5229_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5229_end_0 = const()[name = tensor<string, []>("op_5229_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5229_end_mask_0 = const()[name = tensor<string, []>("op_5229_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5229_cast_fp16 = slice_by_index(begin = var_5229_begin_0, end = var_5229_end_0, end_mask = var_5229_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5229_cast_fp16")];
+            tensor<int32, [4]> var_5236_begin_0 = const()[name = tensor<string, []>("op_5236_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5236_end_0 = const()[name = tensor<string, []>("op_5236_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5236_end_mask_0 = const()[name = tensor<string, []>("op_5236_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5236_cast_fp16 = slice_by_index(begin = var_5236_begin_0, end = var_5236_end_0, end_mask = var_5236_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5236_cast_fp16")];
+            tensor<int32, [4]> var_5243_begin_0 = const()[name = tensor<string, []>("op_5243_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5243_end_0 = const()[name = tensor<string, []>("op_5243_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5243_end_mask_0 = const()[name = tensor<string, []>("op_5243_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5243_cast_fp16 = slice_by_index(begin = var_5243_begin_0, end = var_5243_end_0, end_mask = var_5243_end_mask_0, x = var_5025_cast_fp16)[name = tensor<string, []>("op_5243_cast_fp16")];
+            tensor<int32, [4]> var_5250_begin_0 = const()[name = tensor<string, []>("op_5250_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5250_end_0 = const()[name = tensor<string, []>("op_5250_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5250_end_mask_0 = const()[name = tensor<string, []>("op_5250_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5250_cast_fp16 = slice_by_index(begin = var_5250_begin_0, end = var_5250_end_0, end_mask = var_5250_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5250_cast_fp16")];
+            tensor<int32, [4]> var_5257_begin_0 = const()[name = tensor<string, []>("op_5257_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5257_end_0 = const()[name = tensor<string, []>("op_5257_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5257_end_mask_0 = const()[name = tensor<string, []>("op_5257_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5257_cast_fp16 = slice_by_index(begin = var_5257_begin_0, end = var_5257_end_0, end_mask = var_5257_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5257_cast_fp16")];
+            tensor<int32, [4]> var_5264_begin_0 = const()[name = tensor<string, []>("op_5264_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5264_end_0 = const()[name = tensor<string, []>("op_5264_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5264_end_mask_0 = const()[name = tensor<string, []>("op_5264_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5264_cast_fp16 = slice_by_index(begin = var_5264_begin_0, end = var_5264_end_0, end_mask = var_5264_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5264_cast_fp16")];
+            tensor<int32, [4]> var_5271_begin_0 = const()[name = tensor<string, []>("op_5271_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5271_end_0 = const()[name = tensor<string, []>("op_5271_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5271_end_mask_0 = const()[name = tensor<string, []>("op_5271_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5271_cast_fp16 = slice_by_index(begin = var_5271_begin_0, end = var_5271_end_0, end_mask = var_5271_end_mask_0, x = var_5029_cast_fp16)[name = tensor<string, []>("op_5271_cast_fp16")];
+            tensor<int32, [4]> var_5278_begin_0 = const()[name = tensor<string, []>("op_5278_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5278_end_0 = const()[name = tensor<string, []>("op_5278_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5278_end_mask_0 = const()[name = tensor<string, []>("op_5278_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5278_cast_fp16 = slice_by_index(begin = var_5278_begin_0, end = var_5278_end_0, end_mask = var_5278_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5278_cast_fp16")];
+            tensor<int32, [4]> var_5285_begin_0 = const()[name = tensor<string, []>("op_5285_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5285_end_0 = const()[name = tensor<string, []>("op_5285_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5285_end_mask_0 = const()[name = tensor<string, []>("op_5285_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5285_cast_fp16 = slice_by_index(begin = var_5285_begin_0, end = var_5285_end_0, end_mask = var_5285_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5285_cast_fp16")];
+            tensor<int32, [4]> var_5292_begin_0 = const()[name = tensor<string, []>("op_5292_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5292_end_0 = const()[name = tensor<string, []>("op_5292_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5292_end_mask_0 = const()[name = tensor<string, []>("op_5292_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5292_cast_fp16 = slice_by_index(begin = var_5292_begin_0, end = var_5292_end_0, end_mask = var_5292_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5292_cast_fp16")];
+            tensor<int32, [4]> var_5299_begin_0 = const()[name = tensor<string, []>("op_5299_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5299_end_0 = const()[name = tensor<string, []>("op_5299_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5299_end_mask_0 = const()[name = tensor<string, []>("op_5299_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5299_cast_fp16 = slice_by_index(begin = var_5299_begin_0, end = var_5299_end_0, end_mask = var_5299_end_mask_0, x = var_5033_cast_fp16)[name = tensor<string, []>("op_5299_cast_fp16")];
+            tensor<int32, [4]> var_5306_begin_0 = const()[name = tensor<string, []>("op_5306_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5306_end_0 = const()[name = tensor<string, []>("op_5306_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5306_end_mask_0 = const()[name = tensor<string, []>("op_5306_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5306_cast_fp16 = slice_by_index(begin = var_5306_begin_0, end = var_5306_end_0, end_mask = var_5306_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5306_cast_fp16")];
+            tensor<int32, [4]> var_5313_begin_0 = const()[name = tensor<string, []>("op_5313_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5313_end_0 = const()[name = tensor<string, []>("op_5313_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5313_end_mask_0 = const()[name = tensor<string, []>("op_5313_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5313_cast_fp16 = slice_by_index(begin = var_5313_begin_0, end = var_5313_end_0, end_mask = var_5313_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5313_cast_fp16")];
+            tensor<int32, [4]> var_5320_begin_0 = const()[name = tensor<string, []>("op_5320_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5320_end_0 = const()[name = tensor<string, []>("op_5320_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5320_end_mask_0 = const()[name = tensor<string, []>("op_5320_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5320_cast_fp16 = slice_by_index(begin = var_5320_begin_0, end = var_5320_end_0, end_mask = var_5320_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5320_cast_fp16")];
+            tensor<int32, [4]> var_5327_begin_0 = const()[name = tensor<string, []>("op_5327_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5327_end_0 = const()[name = tensor<string, []>("op_5327_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5327_end_mask_0 = const()[name = tensor<string, []>("op_5327_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5327_cast_fp16 = slice_by_index(begin = var_5327_begin_0, end = var_5327_end_0, end_mask = var_5327_end_mask_0, x = var_5037_cast_fp16)[name = tensor<string, []>("op_5327_cast_fp16")];
+            tensor<int32, [4]> var_5334_begin_0 = const()[name = tensor<string, []>("op_5334_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5334_end_0 = const()[name = tensor<string, []>("op_5334_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5334_end_mask_0 = const()[name = tensor<string, []>("op_5334_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5334_cast_fp16 = slice_by_index(begin = var_5334_begin_0, end = var_5334_end_0, end_mask = var_5334_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5334_cast_fp16")];
+            tensor<int32, [4]> var_5341_begin_0 = const()[name = tensor<string, []>("op_5341_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5341_end_0 = const()[name = tensor<string, []>("op_5341_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5341_end_mask_0 = const()[name = tensor<string, []>("op_5341_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5341_cast_fp16 = slice_by_index(begin = var_5341_begin_0, end = var_5341_end_0, end_mask = var_5341_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5341_cast_fp16")];
+            tensor<int32, [4]> var_5348_begin_0 = const()[name = tensor<string, []>("op_5348_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5348_end_0 = const()[name = tensor<string, []>("op_5348_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5348_end_mask_0 = const()[name = tensor<string, []>("op_5348_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5348_cast_fp16 = slice_by_index(begin = var_5348_begin_0, end = var_5348_end_0, end_mask = var_5348_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5348_cast_fp16")];
+            tensor<int32, [4]> var_5355_begin_0 = const()[name = tensor<string, []>("op_5355_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5355_end_0 = const()[name = tensor<string, []>("op_5355_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5355_end_mask_0 = const()[name = tensor<string, []>("op_5355_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5355_cast_fp16 = slice_by_index(begin = var_5355_begin_0, end = var_5355_end_0, end_mask = var_5355_end_mask_0, x = var_5041_cast_fp16)[name = tensor<string, []>("op_5355_cast_fp16")];
+            tensor<int32, [4]> var_5362_begin_0 = const()[name = tensor<string, []>("op_5362_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5362_end_0 = const()[name = tensor<string, []>("op_5362_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_5362_end_mask_0 = const()[name = tensor<string, []>("op_5362_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5362_cast_fp16 = slice_by_index(begin = var_5362_begin_0, end = var_5362_end_0, end_mask = var_5362_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5362_cast_fp16")];
+            tensor<int32, [4]> var_5369_begin_0 = const()[name = tensor<string, []>("op_5369_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_5369_end_0 = const()[name = tensor<string, []>("op_5369_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_5369_end_mask_0 = const()[name = tensor<string, []>("op_5369_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5369_cast_fp16 = slice_by_index(begin = var_5369_begin_0, end = var_5369_end_0, end_mask = var_5369_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5369_cast_fp16")];
+            tensor<int32, [4]> var_5376_begin_0 = const()[name = tensor<string, []>("op_5376_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_5376_end_0 = const()[name = tensor<string, []>("op_5376_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_5376_end_mask_0 = const()[name = tensor<string, []>("op_5376_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5376_cast_fp16 = slice_by_index(begin = var_5376_begin_0, end = var_5376_end_0, end_mask = var_5376_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5376_cast_fp16")];
+            tensor<int32, [4]> var_5383_begin_0 = const()[name = tensor<string, []>("op_5383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_5383_end_0 = const()[name = tensor<string, []>("op_5383_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5383_end_mask_0 = const()[name = tensor<string, []>("op_5383_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_5383_cast_fp16 = slice_by_index(begin = var_5383_begin_0, end = var_5383_end_0, end_mask = var_5383_end_mask_0, x = var_5045_cast_fp16)[name = tensor<string, []>("op_5383_cast_fp16")];
+            tensor<int32, [4]> k_11_perm_0 = const()[name = tensor<string, []>("k_11_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_5388_begin_0 = const()[name = tensor<string, []>("op_5388_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5388_end_0 = const()[name = tensor<string, []>("op_5388_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_5388_end_mask_0 = const()[name = tensor<string, []>("op_5388_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_6 = transpose(perm = k_11_perm_0, x = key_11_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5388_cast_fp16 = slice_by_index(begin = var_5388_begin_0, end = var_5388_end_0, end_mask = var_5388_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5388_cast_fp16")];
+            tensor<int32, [4]> var_5392_begin_0 = const()[name = tensor<string, []>("op_5392_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_5392_end_0 = const()[name = tensor<string, []>("op_5392_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_5392_end_mask_0 = const()[name = tensor<string, []>("op_5392_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16 = slice_by_index(begin = var_5392_begin_0, end = var_5392_end_0, end_mask = var_5392_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<int32, [4]> var_5396_begin_0 = const()[name = tensor<string, []>("op_5396_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_5396_end_0 = const()[name = tensor<string, []>("op_5396_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_5396_end_mask_0 = const()[name = tensor<string, []>("op_5396_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5396_cast_fp16 = slice_by_index(begin = var_5396_begin_0, end = var_5396_end_0, end_mask = var_5396_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5396_cast_fp16")];
+            tensor<int32, [4]> var_5400_begin_0 = const()[name = tensor<string, []>("op_5400_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_5400_end_0 = const()[name = tensor<string, []>("op_5400_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_5400_end_mask_0 = const()[name = tensor<string, []>("op_5400_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5400_cast_fp16 = slice_by_index(begin = var_5400_begin_0, end = var_5400_end_0, end_mask = var_5400_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5400_cast_fp16")];
+            tensor<int32, [4]> var_5404_begin_0 = const()[name = tensor<string, []>("op_5404_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_5404_end_0 = const()[name = tensor<string, []>("op_5404_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_5404_end_mask_0 = const()[name = tensor<string, []>("op_5404_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5404_cast_fp16 = slice_by_index(begin = var_5404_begin_0, end = var_5404_end_0, end_mask = var_5404_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5404_cast_fp16")];
+            tensor<int32, [4]> var_5408_begin_0 = const()[name = tensor<string, []>("op_5408_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_5408_end_0 = const()[name = tensor<string, []>("op_5408_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_5408_end_mask_0 = const()[name = tensor<string, []>("op_5408_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5408_cast_fp16 = slice_by_index(begin = var_5408_begin_0, end = var_5408_end_0, end_mask = var_5408_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5408_cast_fp16")];
+            tensor<int32, [4]> var_5412_begin_0 = const()[name = tensor<string, []>("op_5412_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_5412_end_0 = const()[name = tensor<string, []>("op_5412_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_5412_end_mask_0 = const()[name = tensor<string, []>("op_5412_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5412_cast_fp16 = slice_by_index(begin = var_5412_begin_0, end = var_5412_end_0, end_mask = var_5412_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5412_cast_fp16")];
+            tensor<int32, [4]> var_5416_begin_0 = const()[name = tensor<string, []>("op_5416_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_5416_end_0 = const()[name = tensor<string, []>("op_5416_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_5416_end_mask_0 = const()[name = tensor<string, []>("op_5416_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5416_cast_fp16 = slice_by_index(begin = var_5416_begin_0, end = var_5416_end_0, end_mask = var_5416_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5416_cast_fp16")];
+            tensor<int32, [4]> var_5420_begin_0 = const()[name = tensor<string, []>("op_5420_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_5420_end_0 = const()[name = tensor<string, []>("op_5420_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_5420_end_mask_0 = const()[name = tensor<string, []>("op_5420_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5420_cast_fp16 = slice_by_index(begin = var_5420_begin_0, end = var_5420_end_0, end_mask = var_5420_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5420_cast_fp16")];
+            tensor<int32, [4]> var_5424_begin_0 = const()[name = tensor<string, []>("op_5424_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_5424_end_0 = const()[name = tensor<string, []>("op_5424_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_5424_end_mask_0 = const()[name = tensor<string, []>("op_5424_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5424_cast_fp16 = slice_by_index(begin = var_5424_begin_0, end = var_5424_end_0, end_mask = var_5424_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5424_cast_fp16")];
+            tensor<int32, [4]> var_5428_begin_0 = const()[name = tensor<string, []>("op_5428_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_5428_end_0 = const()[name = tensor<string, []>("op_5428_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_5428_end_mask_0 = const()[name = tensor<string, []>("op_5428_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5428_cast_fp16 = slice_by_index(begin = var_5428_begin_0, end = var_5428_end_0, end_mask = var_5428_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5428_cast_fp16")];
+            tensor<int32, [4]> var_5432_begin_0 = const()[name = tensor<string, []>("op_5432_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_5432_end_0 = const()[name = tensor<string, []>("op_5432_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_5432_end_mask_0 = const()[name = tensor<string, []>("op_5432_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_5432_cast_fp16 = slice_by_index(begin = var_5432_begin_0, end = var_5432_end_0, end_mask = var_5432_end_mask_0, x = transpose_6)[name = tensor<string, []>("op_5432_cast_fp16")];
+            tensor<int32, [4]> var_5434_begin_0 = const()[name = tensor<string, []>("op_5434_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5434_end_0 = const()[name = tensor<string, []>("op_5434_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5434_end_mask_0 = const()[name = tensor<string, []>("op_5434_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5434_cast_fp16 = slice_by_index(begin = var_5434_begin_0, end = var_5434_end_0, end_mask = var_5434_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5434_cast_fp16")];
+            tensor<int32, [4]> var_5438_begin_0 = const()[name = tensor<string, []>("op_5438_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_5438_end_0 = const()[name = tensor<string, []>("op_5438_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_5438_end_mask_0 = const()[name = tensor<string, []>("op_5438_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5438_cast_fp16 = slice_by_index(begin = var_5438_begin_0, end = var_5438_end_0, end_mask = var_5438_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5438_cast_fp16")];
+            tensor<int32, [4]> var_5442_begin_0 = const()[name = tensor<string, []>("op_5442_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_5442_end_0 = const()[name = tensor<string, []>("op_5442_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_5442_end_mask_0 = const()[name = tensor<string, []>("op_5442_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5442_cast_fp16 = slice_by_index(begin = var_5442_begin_0, end = var_5442_end_0, end_mask = var_5442_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5442_cast_fp16")];
+            tensor<int32, [4]> var_5446_begin_0 = const()[name = tensor<string, []>("op_5446_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_5446_end_0 = const()[name = tensor<string, []>("op_5446_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_5446_end_mask_0 = const()[name = tensor<string, []>("op_5446_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5446_cast_fp16 = slice_by_index(begin = var_5446_begin_0, end = var_5446_end_0, end_mask = var_5446_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5446_cast_fp16")];
+            tensor<int32, [4]> var_5450_begin_0 = const()[name = tensor<string, []>("op_5450_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_5450_end_0 = const()[name = tensor<string, []>("op_5450_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_5450_end_mask_0 = const()[name = tensor<string, []>("op_5450_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5450_cast_fp16 = slice_by_index(begin = var_5450_begin_0, end = var_5450_end_0, end_mask = var_5450_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5450_cast_fp16")];
+            tensor<int32, [4]> var_5454_begin_0 = const()[name = tensor<string, []>("op_5454_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_5454_end_0 = const()[name = tensor<string, []>("op_5454_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_5454_end_mask_0 = const()[name = tensor<string, []>("op_5454_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5454_cast_fp16 = slice_by_index(begin = var_5454_begin_0, end = var_5454_end_0, end_mask = var_5454_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5454_cast_fp16")];
+            tensor<int32, [4]> var_5458_begin_0 = const()[name = tensor<string, []>("op_5458_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_5458_end_0 = const()[name = tensor<string, []>("op_5458_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_5458_end_mask_0 = const()[name = tensor<string, []>("op_5458_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5458_cast_fp16 = slice_by_index(begin = var_5458_begin_0, end = var_5458_end_0, end_mask = var_5458_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5458_cast_fp16")];
+            tensor<int32, [4]> var_5462_begin_0 = const()[name = tensor<string, []>("op_5462_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_5462_end_0 = const()[name = tensor<string, []>("op_5462_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_5462_end_mask_0 = const()[name = tensor<string, []>("op_5462_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5462_cast_fp16 = slice_by_index(begin = var_5462_begin_0, end = var_5462_end_0, end_mask = var_5462_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5462_cast_fp16")];
+            tensor<int32, [4]> var_5466_begin_0 = const()[name = tensor<string, []>("op_5466_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_5466_end_0 = const()[name = tensor<string, []>("op_5466_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_5466_end_mask_0 = const()[name = tensor<string, []>("op_5466_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5466_cast_fp16 = slice_by_index(begin = var_5466_begin_0, end = var_5466_end_0, end_mask = var_5466_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5466_cast_fp16")];
+            tensor<int32, [4]> var_5470_begin_0 = const()[name = tensor<string, []>("op_5470_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_5470_end_0 = const()[name = tensor<string, []>("op_5470_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_5470_end_mask_0 = const()[name = tensor<string, []>("op_5470_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5470_cast_fp16 = slice_by_index(begin = var_5470_begin_0, end = var_5470_end_0, end_mask = var_5470_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5470_cast_fp16")];
+            tensor<int32, [4]> var_5474_begin_0 = const()[name = tensor<string, []>("op_5474_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_5474_end_0 = const()[name = tensor<string, []>("op_5474_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_5474_end_mask_0 = const()[name = tensor<string, []>("op_5474_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5474_cast_fp16 = slice_by_index(begin = var_5474_begin_0, end = var_5474_end_0, end_mask = var_5474_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5474_cast_fp16")];
+            tensor<int32, [4]> var_5478_begin_0 = const()[name = tensor<string, []>("op_5478_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_5478_end_0 = const()[name = tensor<string, []>("op_5478_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_5478_end_mask_0 = const()[name = tensor<string, []>("op_5478_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5478_cast_fp16 = slice_by_index(begin = var_5478_begin_0, end = var_5478_end_0, end_mask = var_5478_end_mask_0, x = value_11_cast_fp16)[name = tensor<string, []>("op_5478_cast_fp16")];
+            tensor<string, []> var_5482_equation_0 = const()[name = tensor<string, []>("op_5482_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5482_cast_fp16 = einsum(equation = var_5482_equation_0, values = (var_5388_cast_fp16, var_5054_cast_fp16))[name = tensor<string, []>("op_5482_cast_fp16")];
+            tensor<fp16, []> var_5483_to_fp16 = const()[name = tensor<string, []>("op_5483_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_481_cast_fp16 = mul(x = var_5482_cast_fp16, y = var_5483_to_fp16)[name = tensor<string, []>("aw_chunk_481_cast_fp16")];
+            tensor<string, []> var_5486_equation_0 = const()[name = tensor<string, []>("op_5486_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5486_cast_fp16 = einsum(equation = var_5486_equation_0, values = (var_5388_cast_fp16, var_5061_cast_fp16))[name = tensor<string, []>("op_5486_cast_fp16")];
+            tensor<fp16, []> var_5487_to_fp16 = const()[name = tensor<string, []>("op_5487_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_483_cast_fp16 = mul(x = var_5486_cast_fp16, y = var_5487_to_fp16)[name = tensor<string, []>("aw_chunk_483_cast_fp16")];
+            tensor<string, []> var_5490_equation_0 = const()[name = tensor<string, []>("op_5490_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5490_cast_fp16 = einsum(equation = var_5490_equation_0, values = (var_5388_cast_fp16, var_5068_cast_fp16))[name = tensor<string, []>("op_5490_cast_fp16")];
+            tensor<fp16, []> var_5491_to_fp16 = const()[name = tensor<string, []>("op_5491_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_485_cast_fp16 = mul(x = var_5490_cast_fp16, y = var_5491_to_fp16)[name = tensor<string, []>("aw_chunk_485_cast_fp16")];
+            tensor<string, []> var_5494_equation_0 = const()[name = tensor<string, []>("op_5494_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5494_cast_fp16 = einsum(equation = var_5494_equation_0, values = (var_5388_cast_fp16, var_5075_cast_fp16))[name = tensor<string, []>("op_5494_cast_fp16")];
+            tensor<fp16, []> var_5495_to_fp16 = const()[name = tensor<string, []>("op_5495_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_487_cast_fp16 = mul(x = var_5494_cast_fp16, y = var_5495_to_fp16)[name = tensor<string, []>("aw_chunk_487_cast_fp16")];
+            tensor<string, []> var_5498_equation_0 = const()[name = tensor<string, []>("op_5498_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5498_cast_fp16 = einsum(equation = var_5498_equation_0, values = (var_5392_cast_fp16, var_5082_cast_fp16))[name = tensor<string, []>("op_5498_cast_fp16")];
+            tensor<fp16, []> var_5499_to_fp16 = const()[name = tensor<string, []>("op_5499_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_489_cast_fp16 = mul(x = var_5498_cast_fp16, y = var_5499_to_fp16)[name = tensor<string, []>("aw_chunk_489_cast_fp16")];
+            tensor<string, []> var_5502_equation_0 = const()[name = tensor<string, []>("op_5502_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5502_cast_fp16 = einsum(equation = var_5502_equation_0, values = (var_5392_cast_fp16, var_5089_cast_fp16))[name = tensor<string, []>("op_5502_cast_fp16")];
+            tensor<fp16, []> var_5503_to_fp16 = const()[name = tensor<string, []>("op_5503_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_491_cast_fp16 = mul(x = var_5502_cast_fp16, y = var_5503_to_fp16)[name = tensor<string, []>("aw_chunk_491_cast_fp16")];
+            tensor<string, []> var_5506_equation_0 = const()[name = tensor<string, []>("op_5506_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5506_cast_fp16 = einsum(equation = var_5506_equation_0, values = (var_5392_cast_fp16, var_5096_cast_fp16))[name = tensor<string, []>("op_5506_cast_fp16")];
+            tensor<fp16, []> var_5507_to_fp16 = const()[name = tensor<string, []>("op_5507_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_493_cast_fp16 = mul(x = var_5506_cast_fp16, y = var_5507_to_fp16)[name = tensor<string, []>("aw_chunk_493_cast_fp16")];
+            tensor<string, []> var_5510_equation_0 = const()[name = tensor<string, []>("op_5510_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5510_cast_fp16 = einsum(equation = var_5510_equation_0, values = (var_5392_cast_fp16, var_5103_cast_fp16))[name = tensor<string, []>("op_5510_cast_fp16")];
+            tensor<fp16, []> var_5511_to_fp16 = const()[name = tensor<string, []>("op_5511_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_495_cast_fp16 = mul(x = var_5510_cast_fp16, y = var_5511_to_fp16)[name = tensor<string, []>("aw_chunk_495_cast_fp16")];
+            tensor<string, []> var_5514_equation_0 = const()[name = tensor<string, []>("op_5514_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5514_cast_fp16 = einsum(equation = var_5514_equation_0, values = (var_5396_cast_fp16, var_5110_cast_fp16))[name = tensor<string, []>("op_5514_cast_fp16")];
+            tensor<fp16, []> var_5515_to_fp16 = const()[name = tensor<string, []>("op_5515_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_497_cast_fp16 = mul(x = var_5514_cast_fp16, y = var_5515_to_fp16)[name = tensor<string, []>("aw_chunk_497_cast_fp16")];
+            tensor<string, []> var_5518_equation_0 = const()[name = tensor<string, []>("op_5518_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5518_cast_fp16 = einsum(equation = var_5518_equation_0, values = (var_5396_cast_fp16, var_5117_cast_fp16))[name = tensor<string, []>("op_5518_cast_fp16")];
+            tensor<fp16, []> var_5519_to_fp16 = const()[name = tensor<string, []>("op_5519_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_499_cast_fp16 = mul(x = var_5518_cast_fp16, y = var_5519_to_fp16)[name = tensor<string, []>("aw_chunk_499_cast_fp16")];
+            tensor<string, []> var_5522_equation_0 = const()[name = tensor<string, []>("op_5522_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5522_cast_fp16 = einsum(equation = var_5522_equation_0, values = (var_5396_cast_fp16, var_5124_cast_fp16))[name = tensor<string, []>("op_5522_cast_fp16")];
+            tensor<fp16, []> var_5523_to_fp16 = const()[name = tensor<string, []>("op_5523_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_501_cast_fp16 = mul(x = var_5522_cast_fp16, y = var_5523_to_fp16)[name = tensor<string, []>("aw_chunk_501_cast_fp16")];
+            tensor<string, []> var_5526_equation_0 = const()[name = tensor<string, []>("op_5526_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5526_cast_fp16 = einsum(equation = var_5526_equation_0, values = (var_5396_cast_fp16, var_5131_cast_fp16))[name = tensor<string, []>("op_5526_cast_fp16")];
+            tensor<fp16, []> var_5527_to_fp16 = const()[name = tensor<string, []>("op_5527_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_503_cast_fp16 = mul(x = var_5526_cast_fp16, y = var_5527_to_fp16)[name = tensor<string, []>("aw_chunk_503_cast_fp16")];
+            tensor<string, []> var_5530_equation_0 = const()[name = tensor<string, []>("op_5530_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5530_cast_fp16 = einsum(equation = var_5530_equation_0, values = (var_5400_cast_fp16, var_5138_cast_fp16))[name = tensor<string, []>("op_5530_cast_fp16")];
+            tensor<fp16, []> var_5531_to_fp16 = const()[name = tensor<string, []>("op_5531_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_505_cast_fp16 = mul(x = var_5530_cast_fp16, y = var_5531_to_fp16)[name = tensor<string, []>("aw_chunk_505_cast_fp16")];
+            tensor<string, []> var_5534_equation_0 = const()[name = tensor<string, []>("op_5534_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5534_cast_fp16 = einsum(equation = var_5534_equation_0, values = (var_5400_cast_fp16, var_5145_cast_fp16))[name = tensor<string, []>("op_5534_cast_fp16")];
+            tensor<fp16, []> var_5535_to_fp16 = const()[name = tensor<string, []>("op_5535_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_507_cast_fp16 = mul(x = var_5534_cast_fp16, y = var_5535_to_fp16)[name = tensor<string, []>("aw_chunk_507_cast_fp16")];
+            tensor<string, []> var_5538_equation_0 = const()[name = tensor<string, []>("op_5538_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5538_cast_fp16 = einsum(equation = var_5538_equation_0, values = (var_5400_cast_fp16, var_5152_cast_fp16))[name = tensor<string, []>("op_5538_cast_fp16")];
+            tensor<fp16, []> var_5539_to_fp16 = const()[name = tensor<string, []>("op_5539_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_509_cast_fp16 = mul(x = var_5538_cast_fp16, y = var_5539_to_fp16)[name = tensor<string, []>("aw_chunk_509_cast_fp16")];
+            tensor<string, []> var_5542_equation_0 = const()[name = tensor<string, []>("op_5542_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5542_cast_fp16 = einsum(equation = var_5542_equation_0, values = (var_5400_cast_fp16, var_5159_cast_fp16))[name = tensor<string, []>("op_5542_cast_fp16")];
+            tensor<fp16, []> var_5543_to_fp16 = const()[name = tensor<string, []>("op_5543_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_511_cast_fp16 = mul(x = var_5542_cast_fp16, y = var_5543_to_fp16)[name = tensor<string, []>("aw_chunk_511_cast_fp16")];
+            tensor<string, []> var_5546_equation_0 = const()[name = tensor<string, []>("op_5546_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5546_cast_fp16 = einsum(equation = var_5546_equation_0, values = (var_5404_cast_fp16, var_5166_cast_fp16))[name = tensor<string, []>("op_5546_cast_fp16")];
+            tensor<fp16, []> var_5547_to_fp16 = const()[name = tensor<string, []>("op_5547_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_513_cast_fp16 = mul(x = var_5546_cast_fp16, y = var_5547_to_fp16)[name = tensor<string, []>("aw_chunk_513_cast_fp16")];
+            tensor<string, []> var_5550_equation_0 = const()[name = tensor<string, []>("op_5550_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5550_cast_fp16 = einsum(equation = var_5550_equation_0, values = (var_5404_cast_fp16, var_5173_cast_fp16))[name = tensor<string, []>("op_5550_cast_fp16")];
+            tensor<fp16, []> var_5551_to_fp16 = const()[name = tensor<string, []>("op_5551_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_515_cast_fp16 = mul(x = var_5550_cast_fp16, y = var_5551_to_fp16)[name = tensor<string, []>("aw_chunk_515_cast_fp16")];
+            tensor<string, []> var_5554_equation_0 = const()[name = tensor<string, []>("op_5554_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5554_cast_fp16 = einsum(equation = var_5554_equation_0, values = (var_5404_cast_fp16, var_5180_cast_fp16))[name = tensor<string, []>("op_5554_cast_fp16")];
+            tensor<fp16, []> var_5555_to_fp16 = const()[name = tensor<string, []>("op_5555_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_517_cast_fp16 = mul(x = var_5554_cast_fp16, y = var_5555_to_fp16)[name = tensor<string, []>("aw_chunk_517_cast_fp16")];
+            tensor<string, []> var_5558_equation_0 = const()[name = tensor<string, []>("op_5558_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5558_cast_fp16 = einsum(equation = var_5558_equation_0, values = (var_5404_cast_fp16, var_5187_cast_fp16))[name = tensor<string, []>("op_5558_cast_fp16")];
+            tensor<fp16, []> var_5559_to_fp16 = const()[name = tensor<string, []>("op_5559_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_519_cast_fp16 = mul(x = var_5558_cast_fp16, y = var_5559_to_fp16)[name = tensor<string, []>("aw_chunk_519_cast_fp16")];
+            tensor<string, []> var_5562_equation_0 = const()[name = tensor<string, []>("op_5562_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5562_cast_fp16 = einsum(equation = var_5562_equation_0, values = (var_5408_cast_fp16, var_5194_cast_fp16))[name = tensor<string, []>("op_5562_cast_fp16")];
+            tensor<fp16, []> var_5563_to_fp16 = const()[name = tensor<string, []>("op_5563_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_521_cast_fp16 = mul(x = var_5562_cast_fp16, y = var_5563_to_fp16)[name = tensor<string, []>("aw_chunk_521_cast_fp16")];
+            tensor<string, []> var_5566_equation_0 = const()[name = tensor<string, []>("op_5566_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5566_cast_fp16 = einsum(equation = var_5566_equation_0, values = (var_5408_cast_fp16, var_5201_cast_fp16))[name = tensor<string, []>("op_5566_cast_fp16")];
+            tensor<fp16, []> var_5567_to_fp16 = const()[name = tensor<string, []>("op_5567_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_523_cast_fp16 = mul(x = var_5566_cast_fp16, y = var_5567_to_fp16)[name = tensor<string, []>("aw_chunk_523_cast_fp16")];
+            tensor<string, []> var_5570_equation_0 = const()[name = tensor<string, []>("op_5570_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5570_cast_fp16 = einsum(equation = var_5570_equation_0, values = (var_5408_cast_fp16, var_5208_cast_fp16))[name = tensor<string, []>("op_5570_cast_fp16")];
+            tensor<fp16, []> var_5571_to_fp16 = const()[name = tensor<string, []>("op_5571_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_525_cast_fp16 = mul(x = var_5570_cast_fp16, y = var_5571_to_fp16)[name = tensor<string, []>("aw_chunk_525_cast_fp16")];
+            tensor<string, []> var_5574_equation_0 = const()[name = tensor<string, []>("op_5574_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5574_cast_fp16 = einsum(equation = var_5574_equation_0, values = (var_5408_cast_fp16, var_5215_cast_fp16))[name = tensor<string, []>("op_5574_cast_fp16")];
+            tensor<fp16, []> var_5575_to_fp16 = const()[name = tensor<string, []>("op_5575_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_527_cast_fp16 = mul(x = var_5574_cast_fp16, y = var_5575_to_fp16)[name = tensor<string, []>("aw_chunk_527_cast_fp16")];
+            tensor<string, []> var_5578_equation_0 = const()[name = tensor<string, []>("op_5578_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5578_cast_fp16 = einsum(equation = var_5578_equation_0, values = (var_5412_cast_fp16, var_5222_cast_fp16))[name = tensor<string, []>("op_5578_cast_fp16")];
+            tensor<fp16, []> var_5579_to_fp16 = const()[name = tensor<string, []>("op_5579_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_529_cast_fp16 = mul(x = var_5578_cast_fp16, y = var_5579_to_fp16)[name = tensor<string, []>("aw_chunk_529_cast_fp16")];
+            tensor<string, []> var_5582_equation_0 = const()[name = tensor<string, []>("op_5582_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5582_cast_fp16 = einsum(equation = var_5582_equation_0, values = (var_5412_cast_fp16, var_5229_cast_fp16))[name = tensor<string, []>("op_5582_cast_fp16")];
+            tensor<fp16, []> var_5583_to_fp16 = const()[name = tensor<string, []>("op_5583_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_531_cast_fp16 = mul(x = var_5582_cast_fp16, y = var_5583_to_fp16)[name = tensor<string, []>("aw_chunk_531_cast_fp16")];
+            tensor<string, []> var_5586_equation_0 = const()[name = tensor<string, []>("op_5586_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5586_cast_fp16 = einsum(equation = var_5586_equation_0, values = (var_5412_cast_fp16, var_5236_cast_fp16))[name = tensor<string, []>("op_5586_cast_fp16")];
+            tensor<fp16, []> var_5587_to_fp16 = const()[name = tensor<string, []>("op_5587_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_533_cast_fp16 = mul(x = var_5586_cast_fp16, y = var_5587_to_fp16)[name = tensor<string, []>("aw_chunk_533_cast_fp16")];
+            tensor<string, []> var_5590_equation_0 = const()[name = tensor<string, []>("op_5590_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5590_cast_fp16 = einsum(equation = var_5590_equation_0, values = (var_5412_cast_fp16, var_5243_cast_fp16))[name = tensor<string, []>("op_5590_cast_fp16")];
+            tensor<fp16, []> var_5591_to_fp16 = const()[name = tensor<string, []>("op_5591_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_535_cast_fp16 = mul(x = var_5590_cast_fp16, y = var_5591_to_fp16)[name = tensor<string, []>("aw_chunk_535_cast_fp16")];
+            tensor<string, []> var_5594_equation_0 = const()[name = tensor<string, []>("op_5594_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5594_cast_fp16 = einsum(equation = var_5594_equation_0, values = (var_5416_cast_fp16, var_5250_cast_fp16))[name = tensor<string, []>("op_5594_cast_fp16")];
+            tensor<fp16, []> var_5595_to_fp16 = const()[name = tensor<string, []>("op_5595_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_537_cast_fp16 = mul(x = var_5594_cast_fp16, y = var_5595_to_fp16)[name = tensor<string, []>("aw_chunk_537_cast_fp16")];
+            tensor<string, []> var_5598_equation_0 = const()[name = tensor<string, []>("op_5598_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5598_cast_fp16 = einsum(equation = var_5598_equation_0, values = (var_5416_cast_fp16, var_5257_cast_fp16))[name = tensor<string, []>("op_5598_cast_fp16")];
+            tensor<fp16, []> var_5599_to_fp16 = const()[name = tensor<string, []>("op_5599_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_539_cast_fp16 = mul(x = var_5598_cast_fp16, y = var_5599_to_fp16)[name = tensor<string, []>("aw_chunk_539_cast_fp16")];
+            tensor<string, []> var_5602_equation_0 = const()[name = tensor<string, []>("op_5602_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5602_cast_fp16 = einsum(equation = var_5602_equation_0, values = (var_5416_cast_fp16, var_5264_cast_fp16))[name = tensor<string, []>("op_5602_cast_fp16")];
+            tensor<fp16, []> var_5603_to_fp16 = const()[name = tensor<string, []>("op_5603_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_541_cast_fp16 = mul(x = var_5602_cast_fp16, y = var_5603_to_fp16)[name = tensor<string, []>("aw_chunk_541_cast_fp16")];
+            tensor<string, []> var_5606_equation_0 = const()[name = tensor<string, []>("op_5606_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5606_cast_fp16 = einsum(equation = var_5606_equation_0, values = (var_5416_cast_fp16, var_5271_cast_fp16))[name = tensor<string, []>("op_5606_cast_fp16")];
+            tensor<fp16, []> var_5607_to_fp16 = const()[name = tensor<string, []>("op_5607_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_543_cast_fp16 = mul(x = var_5606_cast_fp16, y = var_5607_to_fp16)[name = tensor<string, []>("aw_chunk_543_cast_fp16")];
+            tensor<string, []> var_5610_equation_0 = const()[name = tensor<string, []>("op_5610_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5610_cast_fp16 = einsum(equation = var_5610_equation_0, values = (var_5420_cast_fp16, var_5278_cast_fp16))[name = tensor<string, []>("op_5610_cast_fp16")];
+            tensor<fp16, []> var_5611_to_fp16 = const()[name = tensor<string, []>("op_5611_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_545_cast_fp16 = mul(x = var_5610_cast_fp16, y = var_5611_to_fp16)[name = tensor<string, []>("aw_chunk_545_cast_fp16")];
+            tensor<string, []> var_5614_equation_0 = const()[name = tensor<string, []>("op_5614_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5614_cast_fp16 = einsum(equation = var_5614_equation_0, values = (var_5420_cast_fp16, var_5285_cast_fp16))[name = tensor<string, []>("op_5614_cast_fp16")];
+            tensor<fp16, []> var_5615_to_fp16 = const()[name = tensor<string, []>("op_5615_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_547_cast_fp16 = mul(x = var_5614_cast_fp16, y = var_5615_to_fp16)[name = tensor<string, []>("aw_chunk_547_cast_fp16")];
+            tensor<string, []> var_5618_equation_0 = const()[name = tensor<string, []>("op_5618_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5618_cast_fp16 = einsum(equation = var_5618_equation_0, values = (var_5420_cast_fp16, var_5292_cast_fp16))[name = tensor<string, []>("op_5618_cast_fp16")];
+            tensor<fp16, []> var_5619_to_fp16 = const()[name = tensor<string, []>("op_5619_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_549_cast_fp16 = mul(x = var_5618_cast_fp16, y = var_5619_to_fp16)[name = tensor<string, []>("aw_chunk_549_cast_fp16")];
+            tensor<string, []> var_5622_equation_0 = const()[name = tensor<string, []>("op_5622_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5622_cast_fp16 = einsum(equation = var_5622_equation_0, values = (var_5420_cast_fp16, var_5299_cast_fp16))[name = tensor<string, []>("op_5622_cast_fp16")];
+            tensor<fp16, []> var_5623_to_fp16 = const()[name = tensor<string, []>("op_5623_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_551_cast_fp16 = mul(x = var_5622_cast_fp16, y = var_5623_to_fp16)[name = tensor<string, []>("aw_chunk_551_cast_fp16")];
+            tensor<string, []> var_5626_equation_0 = const()[name = tensor<string, []>("op_5626_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5626_cast_fp16 = einsum(equation = var_5626_equation_0, values = (var_5424_cast_fp16, var_5306_cast_fp16))[name = tensor<string, []>("op_5626_cast_fp16")];
+            tensor<fp16, []> var_5627_to_fp16 = const()[name = tensor<string, []>("op_5627_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_553_cast_fp16 = mul(x = var_5626_cast_fp16, y = var_5627_to_fp16)[name = tensor<string, []>("aw_chunk_553_cast_fp16")];
+            tensor<string, []> var_5630_equation_0 = const()[name = tensor<string, []>("op_5630_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5630_cast_fp16 = einsum(equation = var_5630_equation_0, values = (var_5424_cast_fp16, var_5313_cast_fp16))[name = tensor<string, []>("op_5630_cast_fp16")];
+            tensor<fp16, []> var_5631_to_fp16 = const()[name = tensor<string, []>("op_5631_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_555_cast_fp16 = mul(x = var_5630_cast_fp16, y = var_5631_to_fp16)[name = tensor<string, []>("aw_chunk_555_cast_fp16")];
+            tensor<string, []> var_5634_equation_0 = const()[name = tensor<string, []>("op_5634_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5634_cast_fp16 = einsum(equation = var_5634_equation_0, values = (var_5424_cast_fp16, var_5320_cast_fp16))[name = tensor<string, []>("op_5634_cast_fp16")];
+            tensor<fp16, []> var_5635_to_fp16 = const()[name = tensor<string, []>("op_5635_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_557_cast_fp16 = mul(x = var_5634_cast_fp16, y = var_5635_to_fp16)[name = tensor<string, []>("aw_chunk_557_cast_fp16")];
+            tensor<string, []> var_5638_equation_0 = const()[name = tensor<string, []>("op_5638_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5638_cast_fp16 = einsum(equation = var_5638_equation_0, values = (var_5424_cast_fp16, var_5327_cast_fp16))[name = tensor<string, []>("op_5638_cast_fp16")];
+            tensor<fp16, []> var_5639_to_fp16 = const()[name = tensor<string, []>("op_5639_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_559_cast_fp16 = mul(x = var_5638_cast_fp16, y = var_5639_to_fp16)[name = tensor<string, []>("aw_chunk_559_cast_fp16")];
+            tensor<string, []> var_5642_equation_0 = const()[name = tensor<string, []>("op_5642_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5642_cast_fp16 = einsum(equation = var_5642_equation_0, values = (var_5428_cast_fp16, var_5334_cast_fp16))[name = tensor<string, []>("op_5642_cast_fp16")];
+            tensor<fp16, []> var_5643_to_fp16 = const()[name = tensor<string, []>("op_5643_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_561_cast_fp16 = mul(x = var_5642_cast_fp16, y = var_5643_to_fp16)[name = tensor<string, []>("aw_chunk_561_cast_fp16")];
+            tensor<string, []> var_5646_equation_0 = const()[name = tensor<string, []>("op_5646_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5646_cast_fp16 = einsum(equation = var_5646_equation_0, values = (var_5428_cast_fp16, var_5341_cast_fp16))[name = tensor<string, []>("op_5646_cast_fp16")];
+            tensor<fp16, []> var_5647_to_fp16 = const()[name = tensor<string, []>("op_5647_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_563_cast_fp16 = mul(x = var_5646_cast_fp16, y = var_5647_to_fp16)[name = tensor<string, []>("aw_chunk_563_cast_fp16")];
+            tensor<string, []> var_5650_equation_0 = const()[name = tensor<string, []>("op_5650_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5650_cast_fp16 = einsum(equation = var_5650_equation_0, values = (var_5428_cast_fp16, var_5348_cast_fp16))[name = tensor<string, []>("op_5650_cast_fp16")];
+            tensor<fp16, []> var_5651_to_fp16 = const()[name = tensor<string, []>("op_5651_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_565_cast_fp16 = mul(x = var_5650_cast_fp16, y = var_5651_to_fp16)[name = tensor<string, []>("aw_chunk_565_cast_fp16")];
+            tensor<string, []> var_5654_equation_0 = const()[name = tensor<string, []>("op_5654_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5654_cast_fp16 = einsum(equation = var_5654_equation_0, values = (var_5428_cast_fp16, var_5355_cast_fp16))[name = tensor<string, []>("op_5654_cast_fp16")];
+            tensor<fp16, []> var_5655_to_fp16 = const()[name = tensor<string, []>("op_5655_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_567_cast_fp16 = mul(x = var_5654_cast_fp16, y = var_5655_to_fp16)[name = tensor<string, []>("aw_chunk_567_cast_fp16")];
+            tensor<string, []> var_5658_equation_0 = const()[name = tensor<string, []>("op_5658_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5658_cast_fp16 = einsum(equation = var_5658_equation_0, values = (var_5432_cast_fp16, var_5362_cast_fp16))[name = tensor<string, []>("op_5658_cast_fp16")];
+            tensor<fp16, []> var_5659_to_fp16 = const()[name = tensor<string, []>("op_5659_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_569_cast_fp16 = mul(x = var_5658_cast_fp16, y = var_5659_to_fp16)[name = tensor<string, []>("aw_chunk_569_cast_fp16")];
+            tensor<string, []> var_5662_equation_0 = const()[name = tensor<string, []>("op_5662_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5662_cast_fp16 = einsum(equation = var_5662_equation_0, values = (var_5432_cast_fp16, var_5369_cast_fp16))[name = tensor<string, []>("op_5662_cast_fp16")];
+            tensor<fp16, []> var_5663_to_fp16 = const()[name = tensor<string, []>("op_5663_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_571_cast_fp16 = mul(x = var_5662_cast_fp16, y = var_5663_to_fp16)[name = tensor<string, []>("aw_chunk_571_cast_fp16")];
+            tensor<string, []> var_5666_equation_0 = const()[name = tensor<string, []>("op_5666_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5666_cast_fp16 = einsum(equation = var_5666_equation_0, values = (var_5432_cast_fp16, var_5376_cast_fp16))[name = tensor<string, []>("op_5666_cast_fp16")];
+            tensor<fp16, []> var_5667_to_fp16 = const()[name = tensor<string, []>("op_5667_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_573_cast_fp16 = mul(x = var_5666_cast_fp16, y = var_5667_to_fp16)[name = tensor<string, []>("aw_chunk_573_cast_fp16")];
+            tensor<string, []> var_5670_equation_0 = const()[name = tensor<string, []>("op_5670_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5670_cast_fp16 = einsum(equation = var_5670_equation_0, values = (var_5432_cast_fp16, var_5383_cast_fp16))[name = tensor<string, []>("op_5670_cast_fp16")];
+            tensor<fp16, []> var_5671_to_fp16 = const()[name = tensor<string, []>("op_5671_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_575_cast_fp16 = mul(x = var_5670_cast_fp16, y = var_5671_to_fp16)[name = tensor<string, []>("aw_chunk_575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5673_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_481_cast_fp16)[name = tensor<string, []>("op_5673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5674_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_483_cast_fp16)[name = tensor<string, []>("op_5674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5675_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_485_cast_fp16)[name = tensor<string, []>("op_5675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5676_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_487_cast_fp16)[name = tensor<string, []>("op_5676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5677_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_489_cast_fp16)[name = tensor<string, []>("op_5677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5678_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_491_cast_fp16)[name = tensor<string, []>("op_5678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5679_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_493_cast_fp16)[name = tensor<string, []>("op_5679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5680_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_495_cast_fp16)[name = tensor<string, []>("op_5680_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5681_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_497_cast_fp16)[name = tensor<string, []>("op_5681_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5682_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_499_cast_fp16)[name = tensor<string, []>("op_5682_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5683_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_501_cast_fp16)[name = tensor<string, []>("op_5683_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5684_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_503_cast_fp16)[name = tensor<string, []>("op_5684_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5685_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_505_cast_fp16)[name = tensor<string, []>("op_5685_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5686_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_507_cast_fp16)[name = tensor<string, []>("op_5686_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5687_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_509_cast_fp16)[name = tensor<string, []>("op_5687_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5688_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_511_cast_fp16)[name = tensor<string, []>("op_5688_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5689_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_513_cast_fp16)[name = tensor<string, []>("op_5689_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5690_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_515_cast_fp16)[name = tensor<string, []>("op_5690_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5691_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_517_cast_fp16)[name = tensor<string, []>("op_5691_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5692_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_519_cast_fp16)[name = tensor<string, []>("op_5692_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5693_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_521_cast_fp16)[name = tensor<string, []>("op_5693_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5694_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_523_cast_fp16)[name = tensor<string, []>("op_5694_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5695_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_525_cast_fp16)[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5696_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_527_cast_fp16)[name = tensor<string, []>("op_5696_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5697_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_529_cast_fp16)[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5698_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_531_cast_fp16)[name = tensor<string, []>("op_5698_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5699_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_533_cast_fp16)[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5700_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_535_cast_fp16)[name = tensor<string, []>("op_5700_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5701_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_537_cast_fp16)[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5702_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_539_cast_fp16)[name = tensor<string, []>("op_5702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5703_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_541_cast_fp16)[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5704_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_543_cast_fp16)[name = tensor<string, []>("op_5704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5705_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_545_cast_fp16)[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5706_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_547_cast_fp16)[name = tensor<string, []>("op_5706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5707_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_549_cast_fp16)[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5708_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_551_cast_fp16)[name = tensor<string, []>("op_5708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5709_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_553_cast_fp16)[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5710_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_555_cast_fp16)[name = tensor<string, []>("op_5710_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5711_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_557_cast_fp16)[name = tensor<string, []>("op_5711_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5712_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_559_cast_fp16)[name = tensor<string, []>("op_5712_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5713_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_561_cast_fp16)[name = tensor<string, []>("op_5713_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5714_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_563_cast_fp16)[name = tensor<string, []>("op_5714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5715_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_565_cast_fp16)[name = tensor<string, []>("op_5715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5716_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_567_cast_fp16)[name = tensor<string, []>("op_5716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5717_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_569_cast_fp16)[name = tensor<string, []>("op_5717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5718_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_571_cast_fp16)[name = tensor<string, []>("op_5718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5719_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_573_cast_fp16)[name = tensor<string, []>("op_5719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_5720_cast_fp16 = softmax(axis = var_4946, x = aw_chunk_575_cast_fp16)[name = tensor<string, []>("op_5720_cast_fp16")];
+            tensor<string, []> var_5722_equation_0 = const()[name = tensor<string, []>("op_5722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5722_cast_fp16 = einsum(equation = var_5722_equation_0, values = (var_5434_cast_fp16, var_5673_cast_fp16))[name = tensor<string, []>("op_5722_cast_fp16")];
+            tensor<string, []> var_5724_equation_0 = const()[name = tensor<string, []>("op_5724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5724_cast_fp16 = einsum(equation = var_5724_equation_0, values = (var_5434_cast_fp16, var_5674_cast_fp16))[name = tensor<string, []>("op_5724_cast_fp16")];
+            tensor<string, []> var_5726_equation_0 = const()[name = tensor<string, []>("op_5726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5726_cast_fp16 = einsum(equation = var_5726_equation_0, values = (var_5434_cast_fp16, var_5675_cast_fp16))[name = tensor<string, []>("op_5726_cast_fp16")];
+            tensor<string, []> var_5728_equation_0 = const()[name = tensor<string, []>("op_5728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5728_cast_fp16 = einsum(equation = var_5728_equation_0, values = (var_5434_cast_fp16, var_5676_cast_fp16))[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<string, []> var_5730_equation_0 = const()[name = tensor<string, []>("op_5730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5730_cast_fp16 = einsum(equation = var_5730_equation_0, values = (var_5438_cast_fp16, var_5677_cast_fp16))[name = tensor<string, []>("op_5730_cast_fp16")];
+            tensor<string, []> var_5732_equation_0 = const()[name = tensor<string, []>("op_5732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5732_cast_fp16 = einsum(equation = var_5732_equation_0, values = (var_5438_cast_fp16, var_5678_cast_fp16))[name = tensor<string, []>("op_5732_cast_fp16")];
+            tensor<string, []> var_5734_equation_0 = const()[name = tensor<string, []>("op_5734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5734_cast_fp16 = einsum(equation = var_5734_equation_0, values = (var_5438_cast_fp16, var_5679_cast_fp16))[name = tensor<string, []>("op_5734_cast_fp16")];
+            tensor<string, []> var_5736_equation_0 = const()[name = tensor<string, []>("op_5736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5736_cast_fp16 = einsum(equation = var_5736_equation_0, values = (var_5438_cast_fp16, var_5680_cast_fp16))[name = tensor<string, []>("op_5736_cast_fp16")];
+            tensor<string, []> var_5738_equation_0 = const()[name = tensor<string, []>("op_5738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5738_cast_fp16 = einsum(equation = var_5738_equation_0, values = (var_5442_cast_fp16, var_5681_cast_fp16))[name = tensor<string, []>("op_5738_cast_fp16")];
+            tensor<string, []> var_5740_equation_0 = const()[name = tensor<string, []>("op_5740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5740_cast_fp16 = einsum(equation = var_5740_equation_0, values = (var_5442_cast_fp16, var_5682_cast_fp16))[name = tensor<string, []>("op_5740_cast_fp16")];
+            tensor<string, []> var_5742_equation_0 = const()[name = tensor<string, []>("op_5742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5742_cast_fp16 = einsum(equation = var_5742_equation_0, values = (var_5442_cast_fp16, var_5683_cast_fp16))[name = tensor<string, []>("op_5742_cast_fp16")];
+            tensor<string, []> var_5744_equation_0 = const()[name = tensor<string, []>("op_5744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5744_cast_fp16 = einsum(equation = var_5744_equation_0, values = (var_5442_cast_fp16, var_5684_cast_fp16))[name = tensor<string, []>("op_5744_cast_fp16")];
+            tensor<string, []> var_5746_equation_0 = const()[name = tensor<string, []>("op_5746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5746_cast_fp16 = einsum(equation = var_5746_equation_0, values = (var_5446_cast_fp16, var_5685_cast_fp16))[name = tensor<string, []>("op_5746_cast_fp16")];
+            tensor<string, []> var_5748_equation_0 = const()[name = tensor<string, []>("op_5748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5748_cast_fp16 = einsum(equation = var_5748_equation_0, values = (var_5446_cast_fp16, var_5686_cast_fp16))[name = tensor<string, []>("op_5748_cast_fp16")];
+            tensor<string, []> var_5750_equation_0 = const()[name = tensor<string, []>("op_5750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5750_cast_fp16 = einsum(equation = var_5750_equation_0, values = (var_5446_cast_fp16, var_5687_cast_fp16))[name = tensor<string, []>("op_5750_cast_fp16")];
+            tensor<string, []> var_5752_equation_0 = const()[name = tensor<string, []>("op_5752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5752_cast_fp16 = einsum(equation = var_5752_equation_0, values = (var_5446_cast_fp16, var_5688_cast_fp16))[name = tensor<string, []>("op_5752_cast_fp16")];
+            tensor<string, []> var_5754_equation_0 = const()[name = tensor<string, []>("op_5754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5754_cast_fp16 = einsum(equation = var_5754_equation_0, values = (var_5450_cast_fp16, var_5689_cast_fp16))[name = tensor<string, []>("op_5754_cast_fp16")];
+            tensor<string, []> var_5756_equation_0 = const()[name = tensor<string, []>("op_5756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5756_cast_fp16 = einsum(equation = var_5756_equation_0, values = (var_5450_cast_fp16, var_5690_cast_fp16))[name = tensor<string, []>("op_5756_cast_fp16")];
+            tensor<string, []> var_5758_equation_0 = const()[name = tensor<string, []>("op_5758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5758_cast_fp16 = einsum(equation = var_5758_equation_0, values = (var_5450_cast_fp16, var_5691_cast_fp16))[name = tensor<string, []>("op_5758_cast_fp16")];
+            tensor<string, []> var_5760_equation_0 = const()[name = tensor<string, []>("op_5760_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5760_cast_fp16 = einsum(equation = var_5760_equation_0, values = (var_5450_cast_fp16, var_5692_cast_fp16))[name = tensor<string, []>("op_5760_cast_fp16")];
+            tensor<string, []> var_5762_equation_0 = const()[name = tensor<string, []>("op_5762_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5762_cast_fp16 = einsum(equation = var_5762_equation_0, values = (var_5454_cast_fp16, var_5693_cast_fp16))[name = tensor<string, []>("op_5762_cast_fp16")];
+            tensor<string, []> var_5764_equation_0 = const()[name = tensor<string, []>("op_5764_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5764_cast_fp16 = einsum(equation = var_5764_equation_0, values = (var_5454_cast_fp16, var_5694_cast_fp16))[name = tensor<string, []>("op_5764_cast_fp16")];
+            tensor<string, []> var_5766_equation_0 = const()[name = tensor<string, []>("op_5766_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5766_cast_fp16 = einsum(equation = var_5766_equation_0, values = (var_5454_cast_fp16, var_5695_cast_fp16))[name = tensor<string, []>("op_5766_cast_fp16")];
+            tensor<string, []> var_5768_equation_0 = const()[name = tensor<string, []>("op_5768_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5768_cast_fp16 = einsum(equation = var_5768_equation_0, values = (var_5454_cast_fp16, var_5696_cast_fp16))[name = tensor<string, []>("op_5768_cast_fp16")];
+            tensor<string, []> var_5770_equation_0 = const()[name = tensor<string, []>("op_5770_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5770_cast_fp16 = einsum(equation = var_5770_equation_0, values = (var_5458_cast_fp16, var_5697_cast_fp16))[name = tensor<string, []>("op_5770_cast_fp16")];
+            tensor<string, []> var_5772_equation_0 = const()[name = tensor<string, []>("op_5772_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5772_cast_fp16 = einsum(equation = var_5772_equation_0, values = (var_5458_cast_fp16, var_5698_cast_fp16))[name = tensor<string, []>("op_5772_cast_fp16")];
+            tensor<string, []> var_5774_equation_0 = const()[name = tensor<string, []>("op_5774_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5774_cast_fp16 = einsum(equation = var_5774_equation_0, values = (var_5458_cast_fp16, var_5699_cast_fp16))[name = tensor<string, []>("op_5774_cast_fp16")];
+            tensor<string, []> var_5776_equation_0 = const()[name = tensor<string, []>("op_5776_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5776_cast_fp16 = einsum(equation = var_5776_equation_0, values = (var_5458_cast_fp16, var_5700_cast_fp16))[name = tensor<string, []>("op_5776_cast_fp16")];
+            tensor<string, []> var_5778_equation_0 = const()[name = tensor<string, []>("op_5778_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5778_cast_fp16 = einsum(equation = var_5778_equation_0, values = (var_5462_cast_fp16, var_5701_cast_fp16))[name = tensor<string, []>("op_5778_cast_fp16")];
+            tensor<string, []> var_5780_equation_0 = const()[name = tensor<string, []>("op_5780_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5780_cast_fp16 = einsum(equation = var_5780_equation_0, values = (var_5462_cast_fp16, var_5702_cast_fp16))[name = tensor<string, []>("op_5780_cast_fp16")];
+            tensor<string, []> var_5782_equation_0 = const()[name = tensor<string, []>("op_5782_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5782_cast_fp16 = einsum(equation = var_5782_equation_0, values = (var_5462_cast_fp16, var_5703_cast_fp16))[name = tensor<string, []>("op_5782_cast_fp16")];
+            tensor<string, []> var_5784_equation_0 = const()[name = tensor<string, []>("op_5784_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5784_cast_fp16 = einsum(equation = var_5784_equation_0, values = (var_5462_cast_fp16, var_5704_cast_fp16))[name = tensor<string, []>("op_5784_cast_fp16")];
+            tensor<string, []> var_5786_equation_0 = const()[name = tensor<string, []>("op_5786_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5786_cast_fp16 = einsum(equation = var_5786_equation_0, values = (var_5466_cast_fp16, var_5705_cast_fp16))[name = tensor<string, []>("op_5786_cast_fp16")];
+            tensor<string, []> var_5788_equation_0 = const()[name = tensor<string, []>("op_5788_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5788_cast_fp16 = einsum(equation = var_5788_equation_0, values = (var_5466_cast_fp16, var_5706_cast_fp16))[name = tensor<string, []>("op_5788_cast_fp16")];
+            tensor<string, []> var_5790_equation_0 = const()[name = tensor<string, []>("op_5790_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5790_cast_fp16 = einsum(equation = var_5790_equation_0, values = (var_5466_cast_fp16, var_5707_cast_fp16))[name = tensor<string, []>("op_5790_cast_fp16")];
+            tensor<string, []> var_5792_equation_0 = const()[name = tensor<string, []>("op_5792_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5792_cast_fp16 = einsum(equation = var_5792_equation_0, values = (var_5466_cast_fp16, var_5708_cast_fp16))[name = tensor<string, []>("op_5792_cast_fp16")];
+            tensor<string, []> var_5794_equation_0 = const()[name = tensor<string, []>("op_5794_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5794_cast_fp16 = einsum(equation = var_5794_equation_0, values = (var_5470_cast_fp16, var_5709_cast_fp16))[name = tensor<string, []>("op_5794_cast_fp16")];
+            tensor<string, []> var_5796_equation_0 = const()[name = tensor<string, []>("op_5796_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5796_cast_fp16 = einsum(equation = var_5796_equation_0, values = (var_5470_cast_fp16, var_5710_cast_fp16))[name = tensor<string, []>("op_5796_cast_fp16")];
+            tensor<string, []> var_5798_equation_0 = const()[name = tensor<string, []>("op_5798_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5798_cast_fp16 = einsum(equation = var_5798_equation_0, values = (var_5470_cast_fp16, var_5711_cast_fp16))[name = tensor<string, []>("op_5798_cast_fp16")];
+            tensor<string, []> var_5800_equation_0 = const()[name = tensor<string, []>("op_5800_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5800_cast_fp16 = einsum(equation = var_5800_equation_0, values = (var_5470_cast_fp16, var_5712_cast_fp16))[name = tensor<string, []>("op_5800_cast_fp16")];
+            tensor<string, []> var_5802_equation_0 = const()[name = tensor<string, []>("op_5802_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5802_cast_fp16 = einsum(equation = var_5802_equation_0, values = (var_5474_cast_fp16, var_5713_cast_fp16))[name = tensor<string, []>("op_5802_cast_fp16")];
+            tensor<string, []> var_5804_equation_0 = const()[name = tensor<string, []>("op_5804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5804_cast_fp16 = einsum(equation = var_5804_equation_0, values = (var_5474_cast_fp16, var_5714_cast_fp16))[name = tensor<string, []>("op_5804_cast_fp16")];
+            tensor<string, []> var_5806_equation_0 = const()[name = tensor<string, []>("op_5806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5806_cast_fp16 = einsum(equation = var_5806_equation_0, values = (var_5474_cast_fp16, var_5715_cast_fp16))[name = tensor<string, []>("op_5806_cast_fp16")];
+            tensor<string, []> var_5808_equation_0 = const()[name = tensor<string, []>("op_5808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5808_cast_fp16 = einsum(equation = var_5808_equation_0, values = (var_5474_cast_fp16, var_5716_cast_fp16))[name = tensor<string, []>("op_5808_cast_fp16")];
+            tensor<string, []> var_5810_equation_0 = const()[name = tensor<string, []>("op_5810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5810_cast_fp16 = einsum(equation = var_5810_equation_0, values = (var_5478_cast_fp16, var_5717_cast_fp16))[name = tensor<string, []>("op_5810_cast_fp16")];
+            tensor<string, []> var_5812_equation_0 = const()[name = tensor<string, []>("op_5812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5812_cast_fp16 = einsum(equation = var_5812_equation_0, values = (var_5478_cast_fp16, var_5718_cast_fp16))[name = tensor<string, []>("op_5812_cast_fp16")];
+            tensor<string, []> var_5814_equation_0 = const()[name = tensor<string, []>("op_5814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5814_cast_fp16 = einsum(equation = var_5814_equation_0, values = (var_5478_cast_fp16, var_5719_cast_fp16))[name = tensor<string, []>("op_5814_cast_fp16")];
+            tensor<string, []> var_5816_equation_0 = const()[name = tensor<string, []>("op_5816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_5816_cast_fp16 = einsum(equation = var_5816_equation_0, values = (var_5478_cast_fp16, var_5720_cast_fp16))[name = tensor<string, []>("op_5816_cast_fp16")];
+            tensor<bool, []> var_5818_interleave_0 = const()[name = tensor<string, []>("op_5818_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5818_cast_fp16 = concat(axis = var_4929, interleave = var_5818_interleave_0, values = (var_5722_cast_fp16, var_5724_cast_fp16, var_5726_cast_fp16, var_5728_cast_fp16))[name = tensor<string, []>("op_5818_cast_fp16")];
+            tensor<bool, []> var_5820_interleave_0 = const()[name = tensor<string, []>("op_5820_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5820_cast_fp16 = concat(axis = var_4929, interleave = var_5820_interleave_0, values = (var_5730_cast_fp16, var_5732_cast_fp16, var_5734_cast_fp16, var_5736_cast_fp16))[name = tensor<string, []>("op_5820_cast_fp16")];
+            tensor<bool, []> var_5822_interleave_0 = const()[name = tensor<string, []>("op_5822_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5822_cast_fp16 = concat(axis = var_4929, interleave = var_5822_interleave_0, values = (var_5738_cast_fp16, var_5740_cast_fp16, var_5742_cast_fp16, var_5744_cast_fp16))[name = tensor<string, []>("op_5822_cast_fp16")];
+            tensor<bool, []> var_5824_interleave_0 = const()[name = tensor<string, []>("op_5824_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5824_cast_fp16 = concat(axis = var_4929, interleave = var_5824_interleave_0, values = (var_5746_cast_fp16, var_5748_cast_fp16, var_5750_cast_fp16, var_5752_cast_fp16))[name = tensor<string, []>("op_5824_cast_fp16")];
+            tensor<bool, []> var_5826_interleave_0 = const()[name = tensor<string, []>("op_5826_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5826_cast_fp16 = concat(axis = var_4929, interleave = var_5826_interleave_0, values = (var_5754_cast_fp16, var_5756_cast_fp16, var_5758_cast_fp16, var_5760_cast_fp16))[name = tensor<string, []>("op_5826_cast_fp16")];
+            tensor<bool, []> var_5828_interleave_0 = const()[name = tensor<string, []>("op_5828_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5828_cast_fp16 = concat(axis = var_4929, interleave = var_5828_interleave_0, values = (var_5762_cast_fp16, var_5764_cast_fp16, var_5766_cast_fp16, var_5768_cast_fp16))[name = tensor<string, []>("op_5828_cast_fp16")];
+            tensor<bool, []> var_5830_interleave_0 = const()[name = tensor<string, []>("op_5830_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5830_cast_fp16 = concat(axis = var_4929, interleave = var_5830_interleave_0, values = (var_5770_cast_fp16, var_5772_cast_fp16, var_5774_cast_fp16, var_5776_cast_fp16))[name = tensor<string, []>("op_5830_cast_fp16")];
+            tensor<bool, []> var_5832_interleave_0 = const()[name = tensor<string, []>("op_5832_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5832_cast_fp16 = concat(axis = var_4929, interleave = var_5832_interleave_0, values = (var_5778_cast_fp16, var_5780_cast_fp16, var_5782_cast_fp16, var_5784_cast_fp16))[name = tensor<string, []>("op_5832_cast_fp16")];
+            tensor<bool, []> var_5834_interleave_0 = const()[name = tensor<string, []>("op_5834_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5834_cast_fp16 = concat(axis = var_4929, interleave = var_5834_interleave_0, values = (var_5786_cast_fp16, var_5788_cast_fp16, var_5790_cast_fp16, var_5792_cast_fp16))[name = tensor<string, []>("op_5834_cast_fp16")];
+            tensor<bool, []> var_5836_interleave_0 = const()[name = tensor<string, []>("op_5836_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5836_cast_fp16 = concat(axis = var_4929, interleave = var_5836_interleave_0, values = (var_5794_cast_fp16, var_5796_cast_fp16, var_5798_cast_fp16, var_5800_cast_fp16))[name = tensor<string, []>("op_5836_cast_fp16")];
+            tensor<bool, []> var_5838_interleave_0 = const()[name = tensor<string, []>("op_5838_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5838_cast_fp16 = concat(axis = var_4929, interleave = var_5838_interleave_0, values = (var_5802_cast_fp16, var_5804_cast_fp16, var_5806_cast_fp16, var_5808_cast_fp16))[name = tensor<string, []>("op_5838_cast_fp16")];
+            tensor<bool, []> var_5840_interleave_0 = const()[name = tensor<string, []>("op_5840_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5840_cast_fp16 = concat(axis = var_4929, interleave = var_5840_interleave_0, values = (var_5810_cast_fp16, var_5812_cast_fp16, var_5814_cast_fp16, var_5816_cast_fp16))[name = tensor<string, []>("op_5840_cast_fp16")];
+            tensor<bool, []> input_41_interleave_0 = const()[name = tensor<string, []>("input_41_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_41_cast_fp16 = concat(axis = var_4946, interleave = input_41_interleave_0, values = (var_5818_cast_fp16, var_5820_cast_fp16, var_5822_cast_fp16, var_5824_cast_fp16, var_5826_cast_fp16, var_5828_cast_fp16, var_5830_cast_fp16, var_5832_cast_fp16, var_5834_cast_fp16, var_5836_cast_fp16, var_5838_cast_fp16, var_5840_cast_fp16))[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [2]> var_5845 = const()[name = tensor<string, []>("op_5845"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5847 = const()[name = tensor<string, []>("op_5847"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_23_pad_type_0 = const()[name = tensor<string, []>("obj_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_23_pad_0 = const()[name = tensor<string, []>("obj_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80639616)))];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81819328)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_23_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_5847, groups = var_4946, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = var_5845, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_5853 = const()[name = tensor<string, []>("op_5853"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_5853, keep_dims = var_4947, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_5857 = const()[name = tensor<string, []>("op_5857"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5858_cast_fp16 = reduce_mean(axes = var_5857, keep_dims = var_4947, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_5858_cast_fp16")];
+            tensor<fp16, []> var_5859_to_fp16 = const()[name = tensor<string, []>("op_5859_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_5860_cast_fp16 = add(x = var_5858_cast_fp16, y = var_5859_to_fp16)[name = tensor<string, []>("op_5860_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_5860_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81820928)))];
+            tensor<fp16, [768]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81822528)))];
+            tensor<fp16, []> input_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<int32, [2]> var_5871 = const()[name = tensor<string, []>("op_5871"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5873 = const()[name = tensor<string, []>("op_5873"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_45_pad_type_0 = const()[name = tensor<string, []>("input_45_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_45_pad_0 = const()[name = tensor<string, []>("input_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81824128)))];
+            tensor<fp16, [3072]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86542784)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_5873, groups = var_4946, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = var_5871, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> input_47_mode_0 = const()[name = tensor<string, []>("input_47_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<int32, [2]> var_5879 = const()[name = tensor<string, []>("op_5879"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5881 = const()[name = tensor<string, []>("op_5881"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_15_pad_type_0 = const()[name = tensor<string, []>("hidden_states_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = tensor<string, []>("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86548992)))];
+            tensor<fp16, [768]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91267648)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_15_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_5881, groups = var_4946, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = var_5879, weight = layers_5_fc2_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_5888 = const()[name = tensor<string, []>("op_5888"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_5905 = const()[name = tensor<string, []>("op_5905"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_5906 = const()[name = tensor<string, []>("op_5906"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_5916 = const()[name = tensor<string, []>("op_5916"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_25_cast_fp16 = reduce_mean(axes = var_5916, keep_dims = var_5906, x = inputs_25_cast_fp16)[name = tensor<string, []>("channels_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_25_cast_fp16 = sub(x = inputs_25_cast_fp16, y = channels_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = zero_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_sq_25_cast_fp16")];
+            tensor<int32, [1]> var_5920 = const()[name = tensor<string, []>("op_5920"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_5921_cast_fp16 = reduce_mean(axes = var_5920, keep_dims = var_5906, x = zero_mean_sq_25_cast_fp16)[name = tensor<string, []>("op_5921_cast_fp16")];
+            tensor<fp16, []> var_5922_to_fp16 = const()[name = tensor<string, []>("op_5922_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_5923_cast_fp16 = add(x = var_5921_cast_fp16, y = var_5922_to_fp16)[name = tensor<string, []>("op_5923_cast_fp16")];
+            tensor<fp16, []> denom_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0_to_fp16, x = var_5923_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = denom_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91269248)))];
+            tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91270848)))];
+            tensor<fp16, []> obj_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<int32, [2]> var_5938 = const()[name = tensor<string, []>("op_5938"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5940 = const()[name = tensor<string, []>("op_5940"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91272448)))];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92452160)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_13_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = var_5940, groups = var_5905, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_5938, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<int32, [2]> var_5944 = const()[name = tensor<string, []>("op_5944"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5946 = const()[name = tensor<string, []>("op_5946"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_13_pad_type_0 = const()[name = tensor<string, []>("key_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_13_pad_0 = const()[name = tensor<string, []>("key_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92453760)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_13_cast_fp16 = conv(dilations = var_5946, groups = var_5905, pad = key_13_pad_0, pad_type = key_13_pad_type_0, strides = var_5944, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<int32, [2]> var_5951 = const()[name = tensor<string, []>("op_5951"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_5953 = const()[name = tensor<string, []>("op_5953"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_13_pad_type_0 = const()[name = tensor<string, []>("value_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_13_pad_0 = const()[name = tensor<string, []>("value_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93633472)))];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94813184)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = var_5953, groups = var_5905, pad = value_13_pad_0, pad_type = value_13_pad_type_0, strides = var_5951, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_5960_begin_0 = const()[name = tensor<string, []>("op_5960_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_5960_end_0 = const()[name = tensor<string, []>("op_5960_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_5960_end_mask_0 = const()[name = tensor<string, []>("op_5960_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5960_cast_fp16 = slice_by_index(begin = var_5960_begin_0, end = var_5960_end_0, end_mask = var_5960_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5960_cast_fp16")];
+            tensor<int32, [4]> var_5964_begin_0 = const()[name = tensor<string, []>("op_5964_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_5964_end_0 = const()[name = tensor<string, []>("op_5964_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_5964_end_mask_0 = const()[name = tensor<string, []>("op_5964_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = var_5964_end_0, end_mask = var_5964_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5964_cast_fp16")];
+            tensor<int32, [4]> var_5968_begin_0 = const()[name = tensor<string, []>("op_5968_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_5968_end_0 = const()[name = tensor<string, []>("op_5968_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_5968_end_mask_0 = const()[name = tensor<string, []>("op_5968_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5968_cast_fp16 = slice_by_index(begin = var_5968_begin_0, end = var_5968_end_0, end_mask = var_5968_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5968_cast_fp16")];
+            tensor<int32, [4]> var_5972_begin_0 = const()[name = tensor<string, []>("op_5972_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_5972_end_0 = const()[name = tensor<string, []>("op_5972_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_5972_end_mask_0 = const()[name = tensor<string, []>("op_5972_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5972_cast_fp16 = slice_by_index(begin = var_5972_begin_0, end = var_5972_end_0, end_mask = var_5972_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5972_cast_fp16")];
+            tensor<int32, [4]> var_5976_begin_0 = const()[name = tensor<string, []>("op_5976_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_5976_end_0 = const()[name = tensor<string, []>("op_5976_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_5976_end_mask_0 = const()[name = tensor<string, []>("op_5976_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5976_cast_fp16 = slice_by_index(begin = var_5976_begin_0, end = var_5976_end_0, end_mask = var_5976_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5976_cast_fp16")];
+            tensor<int32, [4]> var_5980_begin_0 = const()[name = tensor<string, []>("op_5980_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_5980_end_0 = const()[name = tensor<string, []>("op_5980_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_5980_end_mask_0 = const()[name = tensor<string, []>("op_5980_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5980_cast_fp16 = slice_by_index(begin = var_5980_begin_0, end = var_5980_end_0, end_mask = var_5980_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5980_cast_fp16")];
+            tensor<int32, [4]> var_5984_begin_0 = const()[name = tensor<string, []>("op_5984_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_5984_end_0 = const()[name = tensor<string, []>("op_5984_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_5984_end_mask_0 = const()[name = tensor<string, []>("op_5984_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5984_cast_fp16 = slice_by_index(begin = var_5984_begin_0, end = var_5984_end_0, end_mask = var_5984_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5984_cast_fp16")];
+            tensor<int32, [4]> var_5988_begin_0 = const()[name = tensor<string, []>("op_5988_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_5988_end_0 = const()[name = tensor<string, []>("op_5988_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_5988_end_mask_0 = const()[name = tensor<string, []>("op_5988_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5988_cast_fp16 = slice_by_index(begin = var_5988_begin_0, end = var_5988_end_0, end_mask = var_5988_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5988_cast_fp16")];
+            tensor<int32, [4]> var_5992_begin_0 = const()[name = tensor<string, []>("op_5992_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_5992_end_0 = const()[name = tensor<string, []>("op_5992_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_5992_end_mask_0 = const()[name = tensor<string, []>("op_5992_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5992_cast_fp16 = slice_by_index(begin = var_5992_begin_0, end = var_5992_end_0, end_mask = var_5992_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5992_cast_fp16")];
+            tensor<int32, [4]> var_5996_begin_0 = const()[name = tensor<string, []>("op_5996_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_5996_end_0 = const()[name = tensor<string, []>("op_5996_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_5996_end_mask_0 = const()[name = tensor<string, []>("op_5996_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_5996_cast_fp16 = slice_by_index(begin = var_5996_begin_0, end = var_5996_end_0, end_mask = var_5996_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_5996_cast_fp16")];
+            tensor<int32, [4]> var_6000_begin_0 = const()[name = tensor<string, []>("op_6000_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_6000_end_0 = const()[name = tensor<string, []>("op_6000_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_6000_end_mask_0 = const()[name = tensor<string, []>("op_6000_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6000_cast_fp16 = slice_by_index(begin = var_6000_begin_0, end = var_6000_end_0, end_mask = var_6000_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_6000_cast_fp16")];
+            tensor<int32, [4]> var_6004_begin_0 = const()[name = tensor<string, []>("op_6004_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_6004_end_0 = const()[name = tensor<string, []>("op_6004_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_6004_end_mask_0 = const()[name = tensor<string, []>("op_6004_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6004_cast_fp16 = slice_by_index(begin = var_6004_begin_0, end = var_6004_end_0, end_mask = var_6004_end_mask_0, x = query_13_cast_fp16)[name = tensor<string, []>("op_6004_cast_fp16")];
+            tensor<int32, [4]> var_6013_begin_0 = const()[name = tensor<string, []>("op_6013_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6013_end_0 = const()[name = tensor<string, []>("op_6013_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6013_end_mask_0 = const()[name = tensor<string, []>("op_6013_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6013_cast_fp16 = slice_by_index(begin = var_6013_begin_0, end = var_6013_end_0, end_mask = var_6013_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6013_cast_fp16")];
+            tensor<int32, [4]> var_6020_begin_0 = const()[name = tensor<string, []>("op_6020_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6020_end_0 = const()[name = tensor<string, []>("op_6020_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6020_end_mask_0 = const()[name = tensor<string, []>("op_6020_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6020_cast_fp16 = slice_by_index(begin = var_6020_begin_0, end = var_6020_end_0, end_mask = var_6020_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6020_cast_fp16")];
+            tensor<int32, [4]> var_6027_begin_0 = const()[name = tensor<string, []>("op_6027_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6027_end_0 = const()[name = tensor<string, []>("op_6027_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6027_end_mask_0 = const()[name = tensor<string, []>("op_6027_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6027_cast_fp16 = slice_by_index(begin = var_6027_begin_0, end = var_6027_end_0, end_mask = var_6027_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6027_cast_fp16")];
+            tensor<int32, [4]> var_6034_begin_0 = const()[name = tensor<string, []>("op_6034_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6034_end_0 = const()[name = tensor<string, []>("op_6034_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6034_end_mask_0 = const()[name = tensor<string, []>("op_6034_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6034_cast_fp16 = slice_by_index(begin = var_6034_begin_0, end = var_6034_end_0, end_mask = var_6034_end_mask_0, x = var_5960_cast_fp16)[name = tensor<string, []>("op_6034_cast_fp16")];
+            tensor<int32, [4]> var_6041_begin_0 = const()[name = tensor<string, []>("op_6041_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6041_end_0 = const()[name = tensor<string, []>("op_6041_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6041_end_mask_0 = const()[name = tensor<string, []>("op_6041_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6041_cast_fp16 = slice_by_index(begin = var_6041_begin_0, end = var_6041_end_0, end_mask = var_6041_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6041_cast_fp16")];
+            tensor<int32, [4]> var_6048_begin_0 = const()[name = tensor<string, []>("op_6048_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6048_end_0 = const()[name = tensor<string, []>("op_6048_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6048_end_mask_0 = const()[name = tensor<string, []>("op_6048_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6048_cast_fp16 = slice_by_index(begin = var_6048_begin_0, end = var_6048_end_0, end_mask = var_6048_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6048_cast_fp16")];
+            tensor<int32, [4]> var_6055_begin_0 = const()[name = tensor<string, []>("op_6055_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6055_end_0 = const()[name = tensor<string, []>("op_6055_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6055_end_mask_0 = const()[name = tensor<string, []>("op_6055_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6055_cast_fp16 = slice_by_index(begin = var_6055_begin_0, end = var_6055_end_0, end_mask = var_6055_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6055_cast_fp16")];
+            tensor<int32, [4]> var_6062_begin_0 = const()[name = tensor<string, []>("op_6062_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6062_end_0 = const()[name = tensor<string, []>("op_6062_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6062_end_mask_0 = const()[name = tensor<string, []>("op_6062_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6062_cast_fp16 = slice_by_index(begin = var_6062_begin_0, end = var_6062_end_0, end_mask = var_6062_end_mask_0, x = var_5964_cast_fp16)[name = tensor<string, []>("op_6062_cast_fp16")];
+            tensor<int32, [4]> var_6069_begin_0 = const()[name = tensor<string, []>("op_6069_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6069_end_0 = const()[name = tensor<string, []>("op_6069_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6069_end_mask_0 = const()[name = tensor<string, []>("op_6069_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6069_cast_fp16 = slice_by_index(begin = var_6069_begin_0, end = var_6069_end_0, end_mask = var_6069_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6069_cast_fp16")];
+            tensor<int32, [4]> var_6076_begin_0 = const()[name = tensor<string, []>("op_6076_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6076_end_0 = const()[name = tensor<string, []>("op_6076_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6076_end_mask_0 = const()[name = tensor<string, []>("op_6076_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6076_cast_fp16 = slice_by_index(begin = var_6076_begin_0, end = var_6076_end_0, end_mask = var_6076_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6076_cast_fp16")];
+            tensor<int32, [4]> var_6083_begin_0 = const()[name = tensor<string, []>("op_6083_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6083_end_0 = const()[name = tensor<string, []>("op_6083_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6083_end_mask_0 = const()[name = tensor<string, []>("op_6083_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6083_cast_fp16 = slice_by_index(begin = var_6083_begin_0, end = var_6083_end_0, end_mask = var_6083_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6083_cast_fp16")];
+            tensor<int32, [4]> var_6090_begin_0 = const()[name = tensor<string, []>("op_6090_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6090_end_0 = const()[name = tensor<string, []>("op_6090_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6090_end_mask_0 = const()[name = tensor<string, []>("op_6090_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6090_cast_fp16 = slice_by_index(begin = var_6090_begin_0, end = var_6090_end_0, end_mask = var_6090_end_mask_0, x = var_5968_cast_fp16)[name = tensor<string, []>("op_6090_cast_fp16")];
+            tensor<int32, [4]> var_6097_begin_0 = const()[name = tensor<string, []>("op_6097_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6097_end_0 = const()[name = tensor<string, []>("op_6097_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6097_end_mask_0 = const()[name = tensor<string, []>("op_6097_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6097_cast_fp16 = slice_by_index(begin = var_6097_begin_0, end = var_6097_end_0, end_mask = var_6097_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6097_cast_fp16")];
+            tensor<int32, [4]> var_6104_begin_0 = const()[name = tensor<string, []>("op_6104_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6104_end_0 = const()[name = tensor<string, []>("op_6104_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6104_end_mask_0 = const()[name = tensor<string, []>("op_6104_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6104_cast_fp16 = slice_by_index(begin = var_6104_begin_0, end = var_6104_end_0, end_mask = var_6104_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6104_cast_fp16")];
+            tensor<int32, [4]> var_6111_begin_0 = const()[name = tensor<string, []>("op_6111_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6111_end_0 = const()[name = tensor<string, []>("op_6111_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6111_end_mask_0 = const()[name = tensor<string, []>("op_6111_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6111_cast_fp16 = slice_by_index(begin = var_6111_begin_0, end = var_6111_end_0, end_mask = var_6111_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6111_cast_fp16")];
+            tensor<int32, [4]> var_6118_begin_0 = const()[name = tensor<string, []>("op_6118_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6118_end_0 = const()[name = tensor<string, []>("op_6118_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6118_end_mask_0 = const()[name = tensor<string, []>("op_6118_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6118_cast_fp16 = slice_by_index(begin = var_6118_begin_0, end = var_6118_end_0, end_mask = var_6118_end_mask_0, x = var_5972_cast_fp16)[name = tensor<string, []>("op_6118_cast_fp16")];
+            tensor<int32, [4]> var_6125_begin_0 = const()[name = tensor<string, []>("op_6125_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6125_end_0 = const()[name = tensor<string, []>("op_6125_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6125_end_mask_0 = const()[name = tensor<string, []>("op_6125_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6125_cast_fp16 = slice_by_index(begin = var_6125_begin_0, end = var_6125_end_0, end_mask = var_6125_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6125_cast_fp16")];
+            tensor<int32, [4]> var_6132_begin_0 = const()[name = tensor<string, []>("op_6132_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6132_end_0 = const()[name = tensor<string, []>("op_6132_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6132_end_mask_0 = const()[name = tensor<string, []>("op_6132_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6132_cast_fp16 = slice_by_index(begin = var_6132_begin_0, end = var_6132_end_0, end_mask = var_6132_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6132_cast_fp16")];
+            tensor<int32, [4]> var_6139_begin_0 = const()[name = tensor<string, []>("op_6139_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6139_end_0 = const()[name = tensor<string, []>("op_6139_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6139_end_mask_0 = const()[name = tensor<string, []>("op_6139_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6139_cast_fp16 = slice_by_index(begin = var_6139_begin_0, end = var_6139_end_0, end_mask = var_6139_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6139_cast_fp16")];
+            tensor<int32, [4]> var_6146_begin_0 = const()[name = tensor<string, []>("op_6146_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6146_end_0 = const()[name = tensor<string, []>("op_6146_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6146_end_mask_0 = const()[name = tensor<string, []>("op_6146_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6146_cast_fp16 = slice_by_index(begin = var_6146_begin_0, end = var_6146_end_0, end_mask = var_6146_end_mask_0, x = var_5976_cast_fp16)[name = tensor<string, []>("op_6146_cast_fp16")];
+            tensor<int32, [4]> var_6153_begin_0 = const()[name = tensor<string, []>("op_6153_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6153_end_0 = const()[name = tensor<string, []>("op_6153_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6153_end_mask_0 = const()[name = tensor<string, []>("op_6153_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6153_cast_fp16 = slice_by_index(begin = var_6153_begin_0, end = var_6153_end_0, end_mask = var_6153_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6153_cast_fp16")];
+            tensor<int32, [4]> var_6160_begin_0 = const()[name = tensor<string, []>("op_6160_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6160_end_0 = const()[name = tensor<string, []>("op_6160_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6160_end_mask_0 = const()[name = tensor<string, []>("op_6160_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6160_cast_fp16 = slice_by_index(begin = var_6160_begin_0, end = var_6160_end_0, end_mask = var_6160_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6160_cast_fp16")];
+            tensor<int32, [4]> var_6167_begin_0 = const()[name = tensor<string, []>("op_6167_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6167_end_0 = const()[name = tensor<string, []>("op_6167_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6167_end_mask_0 = const()[name = tensor<string, []>("op_6167_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6167_cast_fp16 = slice_by_index(begin = var_6167_begin_0, end = var_6167_end_0, end_mask = var_6167_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6167_cast_fp16")];
+            tensor<int32, [4]> var_6174_begin_0 = const()[name = tensor<string, []>("op_6174_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6174_end_0 = const()[name = tensor<string, []>("op_6174_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6174_end_mask_0 = const()[name = tensor<string, []>("op_6174_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6174_cast_fp16 = slice_by_index(begin = var_6174_begin_0, end = var_6174_end_0, end_mask = var_6174_end_mask_0, x = var_5980_cast_fp16)[name = tensor<string, []>("op_6174_cast_fp16")];
+            tensor<int32, [4]> var_6181_begin_0 = const()[name = tensor<string, []>("op_6181_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6181_end_0 = const()[name = tensor<string, []>("op_6181_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6181_end_mask_0 = const()[name = tensor<string, []>("op_6181_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6181_cast_fp16 = slice_by_index(begin = var_6181_begin_0, end = var_6181_end_0, end_mask = var_6181_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6181_cast_fp16")];
+            tensor<int32, [4]> var_6188_begin_0 = const()[name = tensor<string, []>("op_6188_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6188_end_0 = const()[name = tensor<string, []>("op_6188_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6188_end_mask_0 = const()[name = tensor<string, []>("op_6188_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6188_cast_fp16 = slice_by_index(begin = var_6188_begin_0, end = var_6188_end_0, end_mask = var_6188_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6188_cast_fp16")];
+            tensor<int32, [4]> var_6195_begin_0 = const()[name = tensor<string, []>("op_6195_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6195_end_0 = const()[name = tensor<string, []>("op_6195_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6195_end_mask_0 = const()[name = tensor<string, []>("op_6195_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6195_cast_fp16 = slice_by_index(begin = var_6195_begin_0, end = var_6195_end_0, end_mask = var_6195_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6195_cast_fp16")];
+            tensor<int32, [4]> var_6202_begin_0 = const()[name = tensor<string, []>("op_6202_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6202_end_0 = const()[name = tensor<string, []>("op_6202_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6202_end_mask_0 = const()[name = tensor<string, []>("op_6202_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6202_cast_fp16 = slice_by_index(begin = var_6202_begin_0, end = var_6202_end_0, end_mask = var_6202_end_mask_0, x = var_5984_cast_fp16)[name = tensor<string, []>("op_6202_cast_fp16")];
+            tensor<int32, [4]> var_6209_begin_0 = const()[name = tensor<string, []>("op_6209_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6209_end_0 = const()[name = tensor<string, []>("op_6209_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6209_end_mask_0 = const()[name = tensor<string, []>("op_6209_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6209_cast_fp16 = slice_by_index(begin = var_6209_begin_0, end = var_6209_end_0, end_mask = var_6209_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6209_cast_fp16")];
+            tensor<int32, [4]> var_6216_begin_0 = const()[name = tensor<string, []>("op_6216_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6216_end_0 = const()[name = tensor<string, []>("op_6216_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6216_end_mask_0 = const()[name = tensor<string, []>("op_6216_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6216_cast_fp16 = slice_by_index(begin = var_6216_begin_0, end = var_6216_end_0, end_mask = var_6216_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6216_cast_fp16")];
+            tensor<int32, [4]> var_6223_begin_0 = const()[name = tensor<string, []>("op_6223_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6223_end_0 = const()[name = tensor<string, []>("op_6223_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6223_end_mask_0 = const()[name = tensor<string, []>("op_6223_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6223_cast_fp16 = slice_by_index(begin = var_6223_begin_0, end = var_6223_end_0, end_mask = var_6223_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6223_cast_fp16")];
+            tensor<int32, [4]> var_6230_begin_0 = const()[name = tensor<string, []>("op_6230_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6230_end_0 = const()[name = tensor<string, []>("op_6230_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6230_end_mask_0 = const()[name = tensor<string, []>("op_6230_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6230_cast_fp16 = slice_by_index(begin = var_6230_begin_0, end = var_6230_end_0, end_mask = var_6230_end_mask_0, x = var_5988_cast_fp16)[name = tensor<string, []>("op_6230_cast_fp16")];
+            tensor<int32, [4]> var_6237_begin_0 = const()[name = tensor<string, []>("op_6237_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6237_end_0 = const()[name = tensor<string, []>("op_6237_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6237_end_mask_0 = const()[name = tensor<string, []>("op_6237_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6237_cast_fp16 = slice_by_index(begin = var_6237_begin_0, end = var_6237_end_0, end_mask = var_6237_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6237_cast_fp16")];
+            tensor<int32, [4]> var_6244_begin_0 = const()[name = tensor<string, []>("op_6244_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6244_end_0 = const()[name = tensor<string, []>("op_6244_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6244_end_mask_0 = const()[name = tensor<string, []>("op_6244_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6244_cast_fp16 = slice_by_index(begin = var_6244_begin_0, end = var_6244_end_0, end_mask = var_6244_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6244_cast_fp16")];
+            tensor<int32, [4]> var_6251_begin_0 = const()[name = tensor<string, []>("op_6251_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6251_end_0 = const()[name = tensor<string, []>("op_6251_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6251_end_mask_0 = const()[name = tensor<string, []>("op_6251_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6251_cast_fp16 = slice_by_index(begin = var_6251_begin_0, end = var_6251_end_0, end_mask = var_6251_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6251_cast_fp16")];
+            tensor<int32, [4]> var_6258_begin_0 = const()[name = tensor<string, []>("op_6258_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6258_end_0 = const()[name = tensor<string, []>("op_6258_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6258_end_mask_0 = const()[name = tensor<string, []>("op_6258_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6258_cast_fp16 = slice_by_index(begin = var_6258_begin_0, end = var_6258_end_0, end_mask = var_6258_end_mask_0, x = var_5992_cast_fp16)[name = tensor<string, []>("op_6258_cast_fp16")];
+            tensor<int32, [4]> var_6265_begin_0 = const()[name = tensor<string, []>("op_6265_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6265_end_0 = const()[name = tensor<string, []>("op_6265_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6265_end_mask_0 = const()[name = tensor<string, []>("op_6265_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6265_cast_fp16 = slice_by_index(begin = var_6265_begin_0, end = var_6265_end_0, end_mask = var_6265_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6265_cast_fp16")];
+            tensor<int32, [4]> var_6272_begin_0 = const()[name = tensor<string, []>("op_6272_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6272_end_0 = const()[name = tensor<string, []>("op_6272_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6272_end_mask_0 = const()[name = tensor<string, []>("op_6272_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6272_cast_fp16 = slice_by_index(begin = var_6272_begin_0, end = var_6272_end_0, end_mask = var_6272_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6272_cast_fp16")];
+            tensor<int32, [4]> var_6279_begin_0 = const()[name = tensor<string, []>("op_6279_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6279_end_0 = const()[name = tensor<string, []>("op_6279_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6279_end_mask_0 = const()[name = tensor<string, []>("op_6279_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6279_cast_fp16 = slice_by_index(begin = var_6279_begin_0, end = var_6279_end_0, end_mask = var_6279_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6279_cast_fp16")];
+            tensor<int32, [4]> var_6286_begin_0 = const()[name = tensor<string, []>("op_6286_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6286_end_0 = const()[name = tensor<string, []>("op_6286_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6286_end_mask_0 = const()[name = tensor<string, []>("op_6286_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6286_cast_fp16 = slice_by_index(begin = var_6286_begin_0, end = var_6286_end_0, end_mask = var_6286_end_mask_0, x = var_5996_cast_fp16)[name = tensor<string, []>("op_6286_cast_fp16")];
+            tensor<int32, [4]> var_6293_begin_0 = const()[name = tensor<string, []>("op_6293_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6293_end_0 = const()[name = tensor<string, []>("op_6293_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6293_end_mask_0 = const()[name = tensor<string, []>("op_6293_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6293_cast_fp16 = slice_by_index(begin = var_6293_begin_0, end = var_6293_end_0, end_mask = var_6293_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6293_cast_fp16")];
+            tensor<int32, [4]> var_6300_begin_0 = const()[name = tensor<string, []>("op_6300_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6300_end_0 = const()[name = tensor<string, []>("op_6300_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6300_end_mask_0 = const()[name = tensor<string, []>("op_6300_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6300_cast_fp16 = slice_by_index(begin = var_6300_begin_0, end = var_6300_end_0, end_mask = var_6300_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6300_cast_fp16")];
+            tensor<int32, [4]> var_6307_begin_0 = const()[name = tensor<string, []>("op_6307_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6307_end_0 = const()[name = tensor<string, []>("op_6307_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6307_end_mask_0 = const()[name = tensor<string, []>("op_6307_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6307_cast_fp16 = slice_by_index(begin = var_6307_begin_0, end = var_6307_end_0, end_mask = var_6307_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6307_cast_fp16")];
+            tensor<int32, [4]> var_6314_begin_0 = const()[name = tensor<string, []>("op_6314_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6314_end_0 = const()[name = tensor<string, []>("op_6314_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6314_end_mask_0 = const()[name = tensor<string, []>("op_6314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6314_cast_fp16 = slice_by_index(begin = var_6314_begin_0, end = var_6314_end_0, end_mask = var_6314_end_mask_0, x = var_6000_cast_fp16)[name = tensor<string, []>("op_6314_cast_fp16")];
+            tensor<int32, [4]> var_6321_begin_0 = const()[name = tensor<string, []>("op_6321_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6321_end_0 = const()[name = tensor<string, []>("op_6321_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6321_end_mask_0 = const()[name = tensor<string, []>("op_6321_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6321_cast_fp16 = slice_by_index(begin = var_6321_begin_0, end = var_6321_end_0, end_mask = var_6321_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6321_cast_fp16")];
+            tensor<int32, [4]> var_6328_begin_0 = const()[name = tensor<string, []>("op_6328_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6328_end_0 = const()[name = tensor<string, []>("op_6328_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6328_end_mask_0 = const()[name = tensor<string, []>("op_6328_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6328_cast_fp16 = slice_by_index(begin = var_6328_begin_0, end = var_6328_end_0, end_mask = var_6328_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6328_cast_fp16")];
+            tensor<int32, [4]> var_6335_begin_0 = const()[name = tensor<string, []>("op_6335_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6335_end_0 = const()[name = tensor<string, []>("op_6335_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6335_end_mask_0 = const()[name = tensor<string, []>("op_6335_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6335_cast_fp16 = slice_by_index(begin = var_6335_begin_0, end = var_6335_end_0, end_mask = var_6335_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6335_cast_fp16")];
+            tensor<int32, [4]> var_6342_begin_0 = const()[name = tensor<string, []>("op_6342_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6342_end_0 = const()[name = tensor<string, []>("op_6342_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6342_end_mask_0 = const()[name = tensor<string, []>("op_6342_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6342_cast_fp16 = slice_by_index(begin = var_6342_begin_0, end = var_6342_end_0, end_mask = var_6342_end_mask_0, x = var_6004_cast_fp16)[name = tensor<string, []>("op_6342_cast_fp16")];
+            tensor<int32, [4]> k_13_perm_0 = const()[name = tensor<string, []>("k_13_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_6347_begin_0 = const()[name = tensor<string, []>("op_6347_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6347_end_0 = const()[name = tensor<string, []>("op_6347_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_6347_end_mask_0 = const()[name = tensor<string, []>("op_6347_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_5 = transpose(perm = k_13_perm_0, x = key_13_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6347_cast_fp16 = slice_by_index(begin = var_6347_begin_0, end = var_6347_end_0, end_mask = var_6347_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6347_cast_fp16")];
+            tensor<int32, [4]> var_6351_begin_0 = const()[name = tensor<string, []>("op_6351_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_6351_end_0 = const()[name = tensor<string, []>("op_6351_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_6351_end_mask_0 = const()[name = tensor<string, []>("op_6351_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6351_cast_fp16 = slice_by_index(begin = var_6351_begin_0, end = var_6351_end_0, end_mask = var_6351_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6351_cast_fp16")];
+            tensor<int32, [4]> var_6355_begin_0 = const()[name = tensor<string, []>("op_6355_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_6355_end_0 = const()[name = tensor<string, []>("op_6355_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_6355_end_mask_0 = const()[name = tensor<string, []>("op_6355_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = var_6355_end_0, end_mask = var_6355_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6355_cast_fp16")];
+            tensor<int32, [4]> var_6359_begin_0 = const()[name = tensor<string, []>("op_6359_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_6359_end_0 = const()[name = tensor<string, []>("op_6359_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_6359_end_mask_0 = const()[name = tensor<string, []>("op_6359_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6359_cast_fp16 = slice_by_index(begin = var_6359_begin_0, end = var_6359_end_0, end_mask = var_6359_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6359_cast_fp16")];
+            tensor<int32, [4]> var_6363_begin_0 = const()[name = tensor<string, []>("op_6363_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_6363_end_0 = const()[name = tensor<string, []>("op_6363_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_6363_end_mask_0 = const()[name = tensor<string, []>("op_6363_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6363_cast_fp16 = slice_by_index(begin = var_6363_begin_0, end = var_6363_end_0, end_mask = var_6363_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6363_cast_fp16")];
+            tensor<int32, [4]> var_6367_begin_0 = const()[name = tensor<string, []>("op_6367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_6367_end_0 = const()[name = tensor<string, []>("op_6367_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_6367_end_mask_0 = const()[name = tensor<string, []>("op_6367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6367_cast_fp16 = slice_by_index(begin = var_6367_begin_0, end = var_6367_end_0, end_mask = var_6367_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6367_cast_fp16")];
+            tensor<int32, [4]> var_6371_begin_0 = const()[name = tensor<string, []>("op_6371_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_6371_end_0 = const()[name = tensor<string, []>("op_6371_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_6371_end_mask_0 = const()[name = tensor<string, []>("op_6371_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6371_cast_fp16 = slice_by_index(begin = var_6371_begin_0, end = var_6371_end_0, end_mask = var_6371_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6371_cast_fp16")];
+            tensor<int32, [4]> var_6375_begin_0 = const()[name = tensor<string, []>("op_6375_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_6375_end_0 = const()[name = tensor<string, []>("op_6375_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_6375_end_mask_0 = const()[name = tensor<string, []>("op_6375_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6375_cast_fp16 = slice_by_index(begin = var_6375_begin_0, end = var_6375_end_0, end_mask = var_6375_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6375_cast_fp16")];
+            tensor<int32, [4]> var_6379_begin_0 = const()[name = tensor<string, []>("op_6379_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_6379_end_0 = const()[name = tensor<string, []>("op_6379_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_6379_end_mask_0 = const()[name = tensor<string, []>("op_6379_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6379_cast_fp16 = slice_by_index(begin = var_6379_begin_0, end = var_6379_end_0, end_mask = var_6379_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6379_cast_fp16")];
+            tensor<int32, [4]> var_6383_begin_0 = const()[name = tensor<string, []>("op_6383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_6383_end_0 = const()[name = tensor<string, []>("op_6383_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_6383_end_mask_0 = const()[name = tensor<string, []>("op_6383_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6383_cast_fp16 = slice_by_index(begin = var_6383_begin_0, end = var_6383_end_0, end_mask = var_6383_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6383_cast_fp16")];
+            tensor<int32, [4]> var_6387_begin_0 = const()[name = tensor<string, []>("op_6387_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_6387_end_0 = const()[name = tensor<string, []>("op_6387_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_6387_end_mask_0 = const()[name = tensor<string, []>("op_6387_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = var_6387_end_0, end_mask = var_6387_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6387_cast_fp16")];
+            tensor<int32, [4]> var_6391_begin_0 = const()[name = tensor<string, []>("op_6391_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_6391_end_0 = const()[name = tensor<string, []>("op_6391_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_6391_end_mask_0 = const()[name = tensor<string, []>("op_6391_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_6391_cast_fp16 = slice_by_index(begin = var_6391_begin_0, end = var_6391_end_0, end_mask = var_6391_end_mask_0, x = transpose_5)[name = tensor<string, []>("op_6391_cast_fp16")];
+            tensor<int32, [4]> var_6393_begin_0 = const()[name = tensor<string, []>("op_6393_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6393_end_0 = const()[name = tensor<string, []>("op_6393_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6393_end_mask_0 = const()[name = tensor<string, []>("op_6393_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6393_cast_fp16 = slice_by_index(begin = var_6393_begin_0, end = var_6393_end_0, end_mask = var_6393_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6393_cast_fp16")];
+            tensor<int32, [4]> var_6397_begin_0 = const()[name = tensor<string, []>("op_6397_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_6397_end_0 = const()[name = tensor<string, []>("op_6397_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_6397_end_mask_0 = const()[name = tensor<string, []>("op_6397_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6397_cast_fp16 = slice_by_index(begin = var_6397_begin_0, end = var_6397_end_0, end_mask = var_6397_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6397_cast_fp16")];
+            tensor<int32, [4]> var_6401_begin_0 = const()[name = tensor<string, []>("op_6401_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_6401_end_0 = const()[name = tensor<string, []>("op_6401_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_6401_end_mask_0 = const()[name = tensor<string, []>("op_6401_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6401_cast_fp16 = slice_by_index(begin = var_6401_begin_0, end = var_6401_end_0, end_mask = var_6401_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6401_cast_fp16")];
+            tensor<int32, [4]> var_6405_begin_0 = const()[name = tensor<string, []>("op_6405_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_6405_end_0 = const()[name = tensor<string, []>("op_6405_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_6405_end_mask_0 = const()[name = tensor<string, []>("op_6405_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6405_cast_fp16 = slice_by_index(begin = var_6405_begin_0, end = var_6405_end_0, end_mask = var_6405_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6405_cast_fp16")];
+            tensor<int32, [4]> var_6409_begin_0 = const()[name = tensor<string, []>("op_6409_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_6409_end_0 = const()[name = tensor<string, []>("op_6409_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_6409_end_mask_0 = const()[name = tensor<string, []>("op_6409_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6409_cast_fp16 = slice_by_index(begin = var_6409_begin_0, end = var_6409_end_0, end_mask = var_6409_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6409_cast_fp16")];
+            tensor<int32, [4]> var_6413_begin_0 = const()[name = tensor<string, []>("op_6413_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_6413_end_0 = const()[name = tensor<string, []>("op_6413_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_6413_end_mask_0 = const()[name = tensor<string, []>("op_6413_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6413_cast_fp16 = slice_by_index(begin = var_6413_begin_0, end = var_6413_end_0, end_mask = var_6413_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6413_cast_fp16")];
+            tensor<int32, [4]> var_6417_begin_0 = const()[name = tensor<string, []>("op_6417_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_6417_end_0 = const()[name = tensor<string, []>("op_6417_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_6417_end_mask_0 = const()[name = tensor<string, []>("op_6417_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6417_cast_fp16 = slice_by_index(begin = var_6417_begin_0, end = var_6417_end_0, end_mask = var_6417_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6417_cast_fp16")];
+            tensor<int32, [4]> var_6421_begin_0 = const()[name = tensor<string, []>("op_6421_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_6421_end_0 = const()[name = tensor<string, []>("op_6421_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_6421_end_mask_0 = const()[name = tensor<string, []>("op_6421_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6421_cast_fp16 = slice_by_index(begin = var_6421_begin_0, end = var_6421_end_0, end_mask = var_6421_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6421_cast_fp16")];
+            tensor<int32, [4]> var_6425_begin_0 = const()[name = tensor<string, []>("op_6425_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_6425_end_0 = const()[name = tensor<string, []>("op_6425_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_6425_end_mask_0 = const()[name = tensor<string, []>("op_6425_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6425_cast_fp16 = slice_by_index(begin = var_6425_begin_0, end = var_6425_end_0, end_mask = var_6425_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6425_cast_fp16")];
+            tensor<int32, [4]> var_6429_begin_0 = const()[name = tensor<string, []>("op_6429_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_6429_end_0 = const()[name = tensor<string, []>("op_6429_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_6429_end_mask_0 = const()[name = tensor<string, []>("op_6429_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6429_cast_fp16 = slice_by_index(begin = var_6429_begin_0, end = var_6429_end_0, end_mask = var_6429_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6429_cast_fp16")];
+            tensor<int32, [4]> var_6433_begin_0 = const()[name = tensor<string, []>("op_6433_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_6433_end_0 = const()[name = tensor<string, []>("op_6433_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_6433_end_mask_0 = const()[name = tensor<string, []>("op_6433_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6433_cast_fp16 = slice_by_index(begin = var_6433_begin_0, end = var_6433_end_0, end_mask = var_6433_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6433_cast_fp16")];
+            tensor<int32, [4]> var_6437_begin_0 = const()[name = tensor<string, []>("op_6437_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_6437_end_0 = const()[name = tensor<string, []>("op_6437_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_6437_end_mask_0 = const()[name = tensor<string, []>("op_6437_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6437_cast_fp16 = slice_by_index(begin = var_6437_begin_0, end = var_6437_end_0, end_mask = var_6437_end_mask_0, x = value_13_cast_fp16)[name = tensor<string, []>("op_6437_cast_fp16")];
+            tensor<string, []> var_6441_equation_0 = const()[name = tensor<string, []>("op_6441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6441_cast_fp16 = einsum(equation = var_6441_equation_0, values = (var_6347_cast_fp16, var_6013_cast_fp16))[name = tensor<string, []>("op_6441_cast_fp16")];
+            tensor<fp16, []> var_6442_to_fp16 = const()[name = tensor<string, []>("op_6442_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_577_cast_fp16 = mul(x = var_6441_cast_fp16, y = var_6442_to_fp16)[name = tensor<string, []>("aw_chunk_577_cast_fp16")];
+            tensor<string, []> var_6445_equation_0 = const()[name = tensor<string, []>("op_6445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6445_cast_fp16 = einsum(equation = var_6445_equation_0, values = (var_6347_cast_fp16, var_6020_cast_fp16))[name = tensor<string, []>("op_6445_cast_fp16")];
+            tensor<fp16, []> var_6446_to_fp16 = const()[name = tensor<string, []>("op_6446_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_579_cast_fp16 = mul(x = var_6445_cast_fp16, y = var_6446_to_fp16)[name = tensor<string, []>("aw_chunk_579_cast_fp16")];
+            tensor<string, []> var_6449_equation_0 = const()[name = tensor<string, []>("op_6449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6449_cast_fp16 = einsum(equation = var_6449_equation_0, values = (var_6347_cast_fp16, var_6027_cast_fp16))[name = tensor<string, []>("op_6449_cast_fp16")];
+            tensor<fp16, []> var_6450_to_fp16 = const()[name = tensor<string, []>("op_6450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_581_cast_fp16 = mul(x = var_6449_cast_fp16, y = var_6450_to_fp16)[name = tensor<string, []>("aw_chunk_581_cast_fp16")];
+            tensor<string, []> var_6453_equation_0 = const()[name = tensor<string, []>("op_6453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6453_cast_fp16 = einsum(equation = var_6453_equation_0, values = (var_6347_cast_fp16, var_6034_cast_fp16))[name = tensor<string, []>("op_6453_cast_fp16")];
+            tensor<fp16, []> var_6454_to_fp16 = const()[name = tensor<string, []>("op_6454_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_583_cast_fp16 = mul(x = var_6453_cast_fp16, y = var_6454_to_fp16)[name = tensor<string, []>("aw_chunk_583_cast_fp16")];
+            tensor<string, []> var_6457_equation_0 = const()[name = tensor<string, []>("op_6457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6457_cast_fp16 = einsum(equation = var_6457_equation_0, values = (var_6351_cast_fp16, var_6041_cast_fp16))[name = tensor<string, []>("op_6457_cast_fp16")];
+            tensor<fp16, []> var_6458_to_fp16 = const()[name = tensor<string, []>("op_6458_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_585_cast_fp16 = mul(x = var_6457_cast_fp16, y = var_6458_to_fp16)[name = tensor<string, []>("aw_chunk_585_cast_fp16")];
+            tensor<string, []> var_6461_equation_0 = const()[name = tensor<string, []>("op_6461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6461_cast_fp16 = einsum(equation = var_6461_equation_0, values = (var_6351_cast_fp16, var_6048_cast_fp16))[name = tensor<string, []>("op_6461_cast_fp16")];
+            tensor<fp16, []> var_6462_to_fp16 = const()[name = tensor<string, []>("op_6462_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_587_cast_fp16 = mul(x = var_6461_cast_fp16, y = var_6462_to_fp16)[name = tensor<string, []>("aw_chunk_587_cast_fp16")];
+            tensor<string, []> var_6465_equation_0 = const()[name = tensor<string, []>("op_6465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6465_cast_fp16 = einsum(equation = var_6465_equation_0, values = (var_6351_cast_fp16, var_6055_cast_fp16))[name = tensor<string, []>("op_6465_cast_fp16")];
+            tensor<fp16, []> var_6466_to_fp16 = const()[name = tensor<string, []>("op_6466_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_589_cast_fp16 = mul(x = var_6465_cast_fp16, y = var_6466_to_fp16)[name = tensor<string, []>("aw_chunk_589_cast_fp16")];
+            tensor<string, []> var_6469_equation_0 = const()[name = tensor<string, []>("op_6469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6469_cast_fp16 = einsum(equation = var_6469_equation_0, values = (var_6351_cast_fp16, var_6062_cast_fp16))[name = tensor<string, []>("op_6469_cast_fp16")];
+            tensor<fp16, []> var_6470_to_fp16 = const()[name = tensor<string, []>("op_6470_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_591_cast_fp16 = mul(x = var_6469_cast_fp16, y = var_6470_to_fp16)[name = tensor<string, []>("aw_chunk_591_cast_fp16")];
+            tensor<string, []> var_6473_equation_0 = const()[name = tensor<string, []>("op_6473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6473_cast_fp16 = einsum(equation = var_6473_equation_0, values = (var_6355_cast_fp16, var_6069_cast_fp16))[name = tensor<string, []>("op_6473_cast_fp16")];
+            tensor<fp16, []> var_6474_to_fp16 = const()[name = tensor<string, []>("op_6474_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_593_cast_fp16 = mul(x = var_6473_cast_fp16, y = var_6474_to_fp16)[name = tensor<string, []>("aw_chunk_593_cast_fp16")];
+            tensor<string, []> var_6477_equation_0 = const()[name = tensor<string, []>("op_6477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6477_cast_fp16 = einsum(equation = var_6477_equation_0, values = (var_6355_cast_fp16, var_6076_cast_fp16))[name = tensor<string, []>("op_6477_cast_fp16")];
+            tensor<fp16, []> var_6478_to_fp16 = const()[name = tensor<string, []>("op_6478_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_595_cast_fp16 = mul(x = var_6477_cast_fp16, y = var_6478_to_fp16)[name = tensor<string, []>("aw_chunk_595_cast_fp16")];
+            tensor<string, []> var_6481_equation_0 = const()[name = tensor<string, []>("op_6481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6481_cast_fp16 = einsum(equation = var_6481_equation_0, values = (var_6355_cast_fp16, var_6083_cast_fp16))[name = tensor<string, []>("op_6481_cast_fp16")];
+            tensor<fp16, []> var_6482_to_fp16 = const()[name = tensor<string, []>("op_6482_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_597_cast_fp16 = mul(x = var_6481_cast_fp16, y = var_6482_to_fp16)[name = tensor<string, []>("aw_chunk_597_cast_fp16")];
+            tensor<string, []> var_6485_equation_0 = const()[name = tensor<string, []>("op_6485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6485_cast_fp16 = einsum(equation = var_6485_equation_0, values = (var_6355_cast_fp16, var_6090_cast_fp16))[name = tensor<string, []>("op_6485_cast_fp16")];
+            tensor<fp16, []> var_6486_to_fp16 = const()[name = tensor<string, []>("op_6486_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_599_cast_fp16 = mul(x = var_6485_cast_fp16, y = var_6486_to_fp16)[name = tensor<string, []>("aw_chunk_599_cast_fp16")];
+            tensor<string, []> var_6489_equation_0 = const()[name = tensor<string, []>("op_6489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6489_cast_fp16 = einsum(equation = var_6489_equation_0, values = (var_6359_cast_fp16, var_6097_cast_fp16))[name = tensor<string, []>("op_6489_cast_fp16")];
+            tensor<fp16, []> var_6490_to_fp16 = const()[name = tensor<string, []>("op_6490_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_601_cast_fp16 = mul(x = var_6489_cast_fp16, y = var_6490_to_fp16)[name = tensor<string, []>("aw_chunk_601_cast_fp16")];
+            tensor<string, []> var_6493_equation_0 = const()[name = tensor<string, []>("op_6493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6493_cast_fp16 = einsum(equation = var_6493_equation_0, values = (var_6359_cast_fp16, var_6104_cast_fp16))[name = tensor<string, []>("op_6493_cast_fp16")];
+            tensor<fp16, []> var_6494_to_fp16 = const()[name = tensor<string, []>("op_6494_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_603_cast_fp16 = mul(x = var_6493_cast_fp16, y = var_6494_to_fp16)[name = tensor<string, []>("aw_chunk_603_cast_fp16")];
+            tensor<string, []> var_6497_equation_0 = const()[name = tensor<string, []>("op_6497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6497_cast_fp16 = einsum(equation = var_6497_equation_0, values = (var_6359_cast_fp16, var_6111_cast_fp16))[name = tensor<string, []>("op_6497_cast_fp16")];
+            tensor<fp16, []> var_6498_to_fp16 = const()[name = tensor<string, []>("op_6498_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_605_cast_fp16 = mul(x = var_6497_cast_fp16, y = var_6498_to_fp16)[name = tensor<string, []>("aw_chunk_605_cast_fp16")];
+            tensor<string, []> var_6501_equation_0 = const()[name = tensor<string, []>("op_6501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6501_cast_fp16 = einsum(equation = var_6501_equation_0, values = (var_6359_cast_fp16, var_6118_cast_fp16))[name = tensor<string, []>("op_6501_cast_fp16")];
+            tensor<fp16, []> var_6502_to_fp16 = const()[name = tensor<string, []>("op_6502_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_607_cast_fp16 = mul(x = var_6501_cast_fp16, y = var_6502_to_fp16)[name = tensor<string, []>("aw_chunk_607_cast_fp16")];
+            tensor<string, []> var_6505_equation_0 = const()[name = tensor<string, []>("op_6505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6505_cast_fp16 = einsum(equation = var_6505_equation_0, values = (var_6363_cast_fp16, var_6125_cast_fp16))[name = tensor<string, []>("op_6505_cast_fp16")];
+            tensor<fp16, []> var_6506_to_fp16 = const()[name = tensor<string, []>("op_6506_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_609_cast_fp16 = mul(x = var_6505_cast_fp16, y = var_6506_to_fp16)[name = tensor<string, []>("aw_chunk_609_cast_fp16")];
+            tensor<string, []> var_6509_equation_0 = const()[name = tensor<string, []>("op_6509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6509_cast_fp16 = einsum(equation = var_6509_equation_0, values = (var_6363_cast_fp16, var_6132_cast_fp16))[name = tensor<string, []>("op_6509_cast_fp16")];
+            tensor<fp16, []> var_6510_to_fp16 = const()[name = tensor<string, []>("op_6510_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_611_cast_fp16 = mul(x = var_6509_cast_fp16, y = var_6510_to_fp16)[name = tensor<string, []>("aw_chunk_611_cast_fp16")];
+            tensor<string, []> var_6513_equation_0 = const()[name = tensor<string, []>("op_6513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6513_cast_fp16 = einsum(equation = var_6513_equation_0, values = (var_6363_cast_fp16, var_6139_cast_fp16))[name = tensor<string, []>("op_6513_cast_fp16")];
+            tensor<fp16, []> var_6514_to_fp16 = const()[name = tensor<string, []>("op_6514_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_613_cast_fp16 = mul(x = var_6513_cast_fp16, y = var_6514_to_fp16)[name = tensor<string, []>("aw_chunk_613_cast_fp16")];
+            tensor<string, []> var_6517_equation_0 = const()[name = tensor<string, []>("op_6517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6517_cast_fp16 = einsum(equation = var_6517_equation_0, values = (var_6363_cast_fp16, var_6146_cast_fp16))[name = tensor<string, []>("op_6517_cast_fp16")];
+            tensor<fp16, []> var_6518_to_fp16 = const()[name = tensor<string, []>("op_6518_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_615_cast_fp16 = mul(x = var_6517_cast_fp16, y = var_6518_to_fp16)[name = tensor<string, []>("aw_chunk_615_cast_fp16")];
+            tensor<string, []> var_6521_equation_0 = const()[name = tensor<string, []>("op_6521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6521_cast_fp16 = einsum(equation = var_6521_equation_0, values = (var_6367_cast_fp16, var_6153_cast_fp16))[name = tensor<string, []>("op_6521_cast_fp16")];
+            tensor<fp16, []> var_6522_to_fp16 = const()[name = tensor<string, []>("op_6522_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_617_cast_fp16 = mul(x = var_6521_cast_fp16, y = var_6522_to_fp16)[name = tensor<string, []>("aw_chunk_617_cast_fp16")];
+            tensor<string, []> var_6525_equation_0 = const()[name = tensor<string, []>("op_6525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6525_cast_fp16 = einsum(equation = var_6525_equation_0, values = (var_6367_cast_fp16, var_6160_cast_fp16))[name = tensor<string, []>("op_6525_cast_fp16")];
+            tensor<fp16, []> var_6526_to_fp16 = const()[name = tensor<string, []>("op_6526_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_619_cast_fp16 = mul(x = var_6525_cast_fp16, y = var_6526_to_fp16)[name = tensor<string, []>("aw_chunk_619_cast_fp16")];
+            tensor<string, []> var_6529_equation_0 = const()[name = tensor<string, []>("op_6529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6529_cast_fp16 = einsum(equation = var_6529_equation_0, values = (var_6367_cast_fp16, var_6167_cast_fp16))[name = tensor<string, []>("op_6529_cast_fp16")];
+            tensor<fp16, []> var_6530_to_fp16 = const()[name = tensor<string, []>("op_6530_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_621_cast_fp16 = mul(x = var_6529_cast_fp16, y = var_6530_to_fp16)[name = tensor<string, []>("aw_chunk_621_cast_fp16")];
+            tensor<string, []> var_6533_equation_0 = const()[name = tensor<string, []>("op_6533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6533_cast_fp16 = einsum(equation = var_6533_equation_0, values = (var_6367_cast_fp16, var_6174_cast_fp16))[name = tensor<string, []>("op_6533_cast_fp16")];
+            tensor<fp16, []> var_6534_to_fp16 = const()[name = tensor<string, []>("op_6534_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_623_cast_fp16 = mul(x = var_6533_cast_fp16, y = var_6534_to_fp16)[name = tensor<string, []>("aw_chunk_623_cast_fp16")];
+            tensor<string, []> var_6537_equation_0 = const()[name = tensor<string, []>("op_6537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6537_cast_fp16 = einsum(equation = var_6537_equation_0, values = (var_6371_cast_fp16, var_6181_cast_fp16))[name = tensor<string, []>("op_6537_cast_fp16")];
+            tensor<fp16, []> var_6538_to_fp16 = const()[name = tensor<string, []>("op_6538_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_625_cast_fp16 = mul(x = var_6537_cast_fp16, y = var_6538_to_fp16)[name = tensor<string, []>("aw_chunk_625_cast_fp16")];
+            tensor<string, []> var_6541_equation_0 = const()[name = tensor<string, []>("op_6541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6541_cast_fp16 = einsum(equation = var_6541_equation_0, values = (var_6371_cast_fp16, var_6188_cast_fp16))[name = tensor<string, []>("op_6541_cast_fp16")];
+            tensor<fp16, []> var_6542_to_fp16 = const()[name = tensor<string, []>("op_6542_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_627_cast_fp16 = mul(x = var_6541_cast_fp16, y = var_6542_to_fp16)[name = tensor<string, []>("aw_chunk_627_cast_fp16")];
+            tensor<string, []> var_6545_equation_0 = const()[name = tensor<string, []>("op_6545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6545_cast_fp16 = einsum(equation = var_6545_equation_0, values = (var_6371_cast_fp16, var_6195_cast_fp16))[name = tensor<string, []>("op_6545_cast_fp16")];
+            tensor<fp16, []> var_6546_to_fp16 = const()[name = tensor<string, []>("op_6546_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_629_cast_fp16 = mul(x = var_6545_cast_fp16, y = var_6546_to_fp16)[name = tensor<string, []>("aw_chunk_629_cast_fp16")];
+            tensor<string, []> var_6549_equation_0 = const()[name = tensor<string, []>("op_6549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6549_cast_fp16 = einsum(equation = var_6549_equation_0, values = (var_6371_cast_fp16, var_6202_cast_fp16))[name = tensor<string, []>("op_6549_cast_fp16")];
+            tensor<fp16, []> var_6550_to_fp16 = const()[name = tensor<string, []>("op_6550_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_631_cast_fp16 = mul(x = var_6549_cast_fp16, y = var_6550_to_fp16)[name = tensor<string, []>("aw_chunk_631_cast_fp16")];
+            tensor<string, []> var_6553_equation_0 = const()[name = tensor<string, []>("op_6553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6553_cast_fp16 = einsum(equation = var_6553_equation_0, values = (var_6375_cast_fp16, var_6209_cast_fp16))[name = tensor<string, []>("op_6553_cast_fp16")];
+            tensor<fp16, []> var_6554_to_fp16 = const()[name = tensor<string, []>("op_6554_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_633_cast_fp16 = mul(x = var_6553_cast_fp16, y = var_6554_to_fp16)[name = tensor<string, []>("aw_chunk_633_cast_fp16")];
+            tensor<string, []> var_6557_equation_0 = const()[name = tensor<string, []>("op_6557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6557_cast_fp16 = einsum(equation = var_6557_equation_0, values = (var_6375_cast_fp16, var_6216_cast_fp16))[name = tensor<string, []>("op_6557_cast_fp16")];
+            tensor<fp16, []> var_6558_to_fp16 = const()[name = tensor<string, []>("op_6558_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_635_cast_fp16 = mul(x = var_6557_cast_fp16, y = var_6558_to_fp16)[name = tensor<string, []>("aw_chunk_635_cast_fp16")];
+            tensor<string, []> var_6561_equation_0 = const()[name = tensor<string, []>("op_6561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6561_cast_fp16 = einsum(equation = var_6561_equation_0, values = (var_6375_cast_fp16, var_6223_cast_fp16))[name = tensor<string, []>("op_6561_cast_fp16")];
+            tensor<fp16, []> var_6562_to_fp16 = const()[name = tensor<string, []>("op_6562_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_637_cast_fp16 = mul(x = var_6561_cast_fp16, y = var_6562_to_fp16)[name = tensor<string, []>("aw_chunk_637_cast_fp16")];
+            tensor<string, []> var_6565_equation_0 = const()[name = tensor<string, []>("op_6565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6565_cast_fp16 = einsum(equation = var_6565_equation_0, values = (var_6375_cast_fp16, var_6230_cast_fp16))[name = tensor<string, []>("op_6565_cast_fp16")];
+            tensor<fp16, []> var_6566_to_fp16 = const()[name = tensor<string, []>("op_6566_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_639_cast_fp16 = mul(x = var_6565_cast_fp16, y = var_6566_to_fp16)[name = tensor<string, []>("aw_chunk_639_cast_fp16")];
+            tensor<string, []> var_6569_equation_0 = const()[name = tensor<string, []>("op_6569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6569_cast_fp16 = einsum(equation = var_6569_equation_0, values = (var_6379_cast_fp16, var_6237_cast_fp16))[name = tensor<string, []>("op_6569_cast_fp16")];
+            tensor<fp16, []> var_6570_to_fp16 = const()[name = tensor<string, []>("op_6570_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_641_cast_fp16 = mul(x = var_6569_cast_fp16, y = var_6570_to_fp16)[name = tensor<string, []>("aw_chunk_641_cast_fp16")];
+            tensor<string, []> var_6573_equation_0 = const()[name = tensor<string, []>("op_6573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6573_cast_fp16 = einsum(equation = var_6573_equation_0, values = (var_6379_cast_fp16, var_6244_cast_fp16))[name = tensor<string, []>("op_6573_cast_fp16")];
+            tensor<fp16, []> var_6574_to_fp16 = const()[name = tensor<string, []>("op_6574_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_643_cast_fp16 = mul(x = var_6573_cast_fp16, y = var_6574_to_fp16)[name = tensor<string, []>("aw_chunk_643_cast_fp16")];
+            tensor<string, []> var_6577_equation_0 = const()[name = tensor<string, []>("op_6577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6577_cast_fp16 = einsum(equation = var_6577_equation_0, values = (var_6379_cast_fp16, var_6251_cast_fp16))[name = tensor<string, []>("op_6577_cast_fp16")];
+            tensor<fp16, []> var_6578_to_fp16 = const()[name = tensor<string, []>("op_6578_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_645_cast_fp16 = mul(x = var_6577_cast_fp16, y = var_6578_to_fp16)[name = tensor<string, []>("aw_chunk_645_cast_fp16")];
+            tensor<string, []> var_6581_equation_0 = const()[name = tensor<string, []>("op_6581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6581_cast_fp16 = einsum(equation = var_6581_equation_0, values = (var_6379_cast_fp16, var_6258_cast_fp16))[name = tensor<string, []>("op_6581_cast_fp16")];
+            tensor<fp16, []> var_6582_to_fp16 = const()[name = tensor<string, []>("op_6582_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_647_cast_fp16 = mul(x = var_6581_cast_fp16, y = var_6582_to_fp16)[name = tensor<string, []>("aw_chunk_647_cast_fp16")];
+            tensor<string, []> var_6585_equation_0 = const()[name = tensor<string, []>("op_6585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6585_cast_fp16 = einsum(equation = var_6585_equation_0, values = (var_6383_cast_fp16, var_6265_cast_fp16))[name = tensor<string, []>("op_6585_cast_fp16")];
+            tensor<fp16, []> var_6586_to_fp16 = const()[name = tensor<string, []>("op_6586_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_649_cast_fp16 = mul(x = var_6585_cast_fp16, y = var_6586_to_fp16)[name = tensor<string, []>("aw_chunk_649_cast_fp16")];
+            tensor<string, []> var_6589_equation_0 = const()[name = tensor<string, []>("op_6589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6589_cast_fp16 = einsum(equation = var_6589_equation_0, values = (var_6383_cast_fp16, var_6272_cast_fp16))[name = tensor<string, []>("op_6589_cast_fp16")];
+            tensor<fp16, []> var_6590_to_fp16 = const()[name = tensor<string, []>("op_6590_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_651_cast_fp16 = mul(x = var_6589_cast_fp16, y = var_6590_to_fp16)[name = tensor<string, []>("aw_chunk_651_cast_fp16")];
+            tensor<string, []> var_6593_equation_0 = const()[name = tensor<string, []>("op_6593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6593_cast_fp16 = einsum(equation = var_6593_equation_0, values = (var_6383_cast_fp16, var_6279_cast_fp16))[name = tensor<string, []>("op_6593_cast_fp16")];
+            tensor<fp16, []> var_6594_to_fp16 = const()[name = tensor<string, []>("op_6594_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_653_cast_fp16 = mul(x = var_6593_cast_fp16, y = var_6594_to_fp16)[name = tensor<string, []>("aw_chunk_653_cast_fp16")];
+            tensor<string, []> var_6597_equation_0 = const()[name = tensor<string, []>("op_6597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6597_cast_fp16 = einsum(equation = var_6597_equation_0, values = (var_6383_cast_fp16, var_6286_cast_fp16))[name = tensor<string, []>("op_6597_cast_fp16")];
+            tensor<fp16, []> var_6598_to_fp16 = const()[name = tensor<string, []>("op_6598_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_655_cast_fp16 = mul(x = var_6597_cast_fp16, y = var_6598_to_fp16)[name = tensor<string, []>("aw_chunk_655_cast_fp16")];
+            tensor<string, []> var_6601_equation_0 = const()[name = tensor<string, []>("op_6601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6601_cast_fp16 = einsum(equation = var_6601_equation_0, values = (var_6387_cast_fp16, var_6293_cast_fp16))[name = tensor<string, []>("op_6601_cast_fp16")];
+            tensor<fp16, []> var_6602_to_fp16 = const()[name = tensor<string, []>("op_6602_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_657_cast_fp16 = mul(x = var_6601_cast_fp16, y = var_6602_to_fp16)[name = tensor<string, []>("aw_chunk_657_cast_fp16")];
+            tensor<string, []> var_6605_equation_0 = const()[name = tensor<string, []>("op_6605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6605_cast_fp16 = einsum(equation = var_6605_equation_0, values = (var_6387_cast_fp16, var_6300_cast_fp16))[name = tensor<string, []>("op_6605_cast_fp16")];
+            tensor<fp16, []> var_6606_to_fp16 = const()[name = tensor<string, []>("op_6606_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_659_cast_fp16 = mul(x = var_6605_cast_fp16, y = var_6606_to_fp16)[name = tensor<string, []>("aw_chunk_659_cast_fp16")];
+            tensor<string, []> var_6609_equation_0 = const()[name = tensor<string, []>("op_6609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6609_cast_fp16 = einsum(equation = var_6609_equation_0, values = (var_6387_cast_fp16, var_6307_cast_fp16))[name = tensor<string, []>("op_6609_cast_fp16")];
+            tensor<fp16, []> var_6610_to_fp16 = const()[name = tensor<string, []>("op_6610_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_661_cast_fp16 = mul(x = var_6609_cast_fp16, y = var_6610_to_fp16)[name = tensor<string, []>("aw_chunk_661_cast_fp16")];
+            tensor<string, []> var_6613_equation_0 = const()[name = tensor<string, []>("op_6613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6613_cast_fp16 = einsum(equation = var_6613_equation_0, values = (var_6387_cast_fp16, var_6314_cast_fp16))[name = tensor<string, []>("op_6613_cast_fp16")];
+            tensor<fp16, []> var_6614_to_fp16 = const()[name = tensor<string, []>("op_6614_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_663_cast_fp16 = mul(x = var_6613_cast_fp16, y = var_6614_to_fp16)[name = tensor<string, []>("aw_chunk_663_cast_fp16")];
+            tensor<string, []> var_6617_equation_0 = const()[name = tensor<string, []>("op_6617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6617_cast_fp16 = einsum(equation = var_6617_equation_0, values = (var_6391_cast_fp16, var_6321_cast_fp16))[name = tensor<string, []>("op_6617_cast_fp16")];
+            tensor<fp16, []> var_6618_to_fp16 = const()[name = tensor<string, []>("op_6618_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_665_cast_fp16 = mul(x = var_6617_cast_fp16, y = var_6618_to_fp16)[name = tensor<string, []>("aw_chunk_665_cast_fp16")];
+            tensor<string, []> var_6621_equation_0 = const()[name = tensor<string, []>("op_6621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6621_cast_fp16 = einsum(equation = var_6621_equation_0, values = (var_6391_cast_fp16, var_6328_cast_fp16))[name = tensor<string, []>("op_6621_cast_fp16")];
+            tensor<fp16, []> var_6622_to_fp16 = const()[name = tensor<string, []>("op_6622_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_667_cast_fp16 = mul(x = var_6621_cast_fp16, y = var_6622_to_fp16)[name = tensor<string, []>("aw_chunk_667_cast_fp16")];
+            tensor<string, []> var_6625_equation_0 = const()[name = tensor<string, []>("op_6625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6625_cast_fp16 = einsum(equation = var_6625_equation_0, values = (var_6391_cast_fp16, var_6335_cast_fp16))[name = tensor<string, []>("op_6625_cast_fp16")];
+            tensor<fp16, []> var_6626_to_fp16 = const()[name = tensor<string, []>("op_6626_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_669_cast_fp16 = mul(x = var_6625_cast_fp16, y = var_6626_to_fp16)[name = tensor<string, []>("aw_chunk_669_cast_fp16")];
+            tensor<string, []> var_6629_equation_0 = const()[name = tensor<string, []>("op_6629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6629_cast_fp16 = einsum(equation = var_6629_equation_0, values = (var_6391_cast_fp16, var_6342_cast_fp16))[name = tensor<string, []>("op_6629_cast_fp16")];
+            tensor<fp16, []> var_6630_to_fp16 = const()[name = tensor<string, []>("op_6630_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_671_cast_fp16 = mul(x = var_6629_cast_fp16, y = var_6630_to_fp16)[name = tensor<string, []>("aw_chunk_671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6632_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_577_cast_fp16)[name = tensor<string, []>("op_6632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6633_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_579_cast_fp16)[name = tensor<string, []>("op_6633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6634_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_581_cast_fp16)[name = tensor<string, []>("op_6634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6635_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_583_cast_fp16)[name = tensor<string, []>("op_6635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6636_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_585_cast_fp16)[name = tensor<string, []>("op_6636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6637_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_587_cast_fp16)[name = tensor<string, []>("op_6637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6638_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_589_cast_fp16)[name = tensor<string, []>("op_6638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6639_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_591_cast_fp16)[name = tensor<string, []>("op_6639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6640_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_593_cast_fp16)[name = tensor<string, []>("op_6640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6641_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_595_cast_fp16)[name = tensor<string, []>("op_6641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6642_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_597_cast_fp16)[name = tensor<string, []>("op_6642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6643_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_599_cast_fp16)[name = tensor<string, []>("op_6643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6644_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_601_cast_fp16)[name = tensor<string, []>("op_6644_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6645_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_603_cast_fp16)[name = tensor<string, []>("op_6645_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6646_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_605_cast_fp16)[name = tensor<string, []>("op_6646_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6647_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_607_cast_fp16)[name = tensor<string, []>("op_6647_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6648_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_609_cast_fp16)[name = tensor<string, []>("op_6648_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6649_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_611_cast_fp16)[name = tensor<string, []>("op_6649_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6650_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_613_cast_fp16)[name = tensor<string, []>("op_6650_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6651_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_615_cast_fp16)[name = tensor<string, []>("op_6651_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6652_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_617_cast_fp16)[name = tensor<string, []>("op_6652_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6653_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_619_cast_fp16)[name = tensor<string, []>("op_6653_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6654_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_621_cast_fp16)[name = tensor<string, []>("op_6654_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6655_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_623_cast_fp16)[name = tensor<string, []>("op_6655_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6656_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_625_cast_fp16)[name = tensor<string, []>("op_6656_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6657_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_627_cast_fp16)[name = tensor<string, []>("op_6657_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6658_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_629_cast_fp16)[name = tensor<string, []>("op_6658_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6659_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_631_cast_fp16)[name = tensor<string, []>("op_6659_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6660_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_633_cast_fp16)[name = tensor<string, []>("op_6660_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6661_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_635_cast_fp16)[name = tensor<string, []>("op_6661_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6662_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_637_cast_fp16)[name = tensor<string, []>("op_6662_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6663_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_639_cast_fp16)[name = tensor<string, []>("op_6663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6664_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_641_cast_fp16)[name = tensor<string, []>("op_6664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6665_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_643_cast_fp16)[name = tensor<string, []>("op_6665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6666_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_645_cast_fp16)[name = tensor<string, []>("op_6666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6667_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_647_cast_fp16)[name = tensor<string, []>("op_6667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6668_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_649_cast_fp16)[name = tensor<string, []>("op_6668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6669_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_651_cast_fp16)[name = tensor<string, []>("op_6669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6670_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_653_cast_fp16)[name = tensor<string, []>("op_6670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6671_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_655_cast_fp16)[name = tensor<string, []>("op_6671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6672_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_657_cast_fp16)[name = tensor<string, []>("op_6672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6673_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_659_cast_fp16)[name = tensor<string, []>("op_6673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6674_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_661_cast_fp16)[name = tensor<string, []>("op_6674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6675_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_663_cast_fp16)[name = tensor<string, []>("op_6675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6676_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_665_cast_fp16)[name = tensor<string, []>("op_6676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6677_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_667_cast_fp16)[name = tensor<string, []>("op_6677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6678_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_669_cast_fp16)[name = tensor<string, []>("op_6678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_6679_cast_fp16 = softmax(axis = var_5905, x = aw_chunk_671_cast_fp16)[name = tensor<string, []>("op_6679_cast_fp16")];
+            tensor<string, []> var_6681_equation_0 = const()[name = tensor<string, []>("op_6681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6681_cast_fp16 = einsum(equation = var_6681_equation_0, values = (var_6393_cast_fp16, var_6632_cast_fp16))[name = tensor<string, []>("op_6681_cast_fp16")];
+            tensor<string, []> var_6683_equation_0 = const()[name = tensor<string, []>("op_6683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6683_cast_fp16 = einsum(equation = var_6683_equation_0, values = (var_6393_cast_fp16, var_6633_cast_fp16))[name = tensor<string, []>("op_6683_cast_fp16")];
+            tensor<string, []> var_6685_equation_0 = const()[name = tensor<string, []>("op_6685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6685_cast_fp16 = einsum(equation = var_6685_equation_0, values = (var_6393_cast_fp16, var_6634_cast_fp16))[name = tensor<string, []>("op_6685_cast_fp16")];
+            tensor<string, []> var_6687_equation_0 = const()[name = tensor<string, []>("op_6687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6687_cast_fp16 = einsum(equation = var_6687_equation_0, values = (var_6393_cast_fp16, var_6635_cast_fp16))[name = tensor<string, []>("op_6687_cast_fp16")];
+            tensor<string, []> var_6689_equation_0 = const()[name = tensor<string, []>("op_6689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6689_cast_fp16 = einsum(equation = var_6689_equation_0, values = (var_6397_cast_fp16, var_6636_cast_fp16))[name = tensor<string, []>("op_6689_cast_fp16")];
+            tensor<string, []> var_6691_equation_0 = const()[name = tensor<string, []>("op_6691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6691_cast_fp16 = einsum(equation = var_6691_equation_0, values = (var_6397_cast_fp16, var_6637_cast_fp16))[name = tensor<string, []>("op_6691_cast_fp16")];
+            tensor<string, []> var_6693_equation_0 = const()[name = tensor<string, []>("op_6693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6693_cast_fp16 = einsum(equation = var_6693_equation_0, values = (var_6397_cast_fp16, var_6638_cast_fp16))[name = tensor<string, []>("op_6693_cast_fp16")];
+            tensor<string, []> var_6695_equation_0 = const()[name = tensor<string, []>("op_6695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6695_cast_fp16 = einsum(equation = var_6695_equation_0, values = (var_6397_cast_fp16, var_6639_cast_fp16))[name = tensor<string, []>("op_6695_cast_fp16")];
+            tensor<string, []> var_6697_equation_0 = const()[name = tensor<string, []>("op_6697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6697_cast_fp16 = einsum(equation = var_6697_equation_0, values = (var_6401_cast_fp16, var_6640_cast_fp16))[name = tensor<string, []>("op_6697_cast_fp16")];
+            tensor<string, []> var_6699_equation_0 = const()[name = tensor<string, []>("op_6699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6699_cast_fp16 = einsum(equation = var_6699_equation_0, values = (var_6401_cast_fp16, var_6641_cast_fp16))[name = tensor<string, []>("op_6699_cast_fp16")];
+            tensor<string, []> var_6701_equation_0 = const()[name = tensor<string, []>("op_6701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6701_cast_fp16 = einsum(equation = var_6701_equation_0, values = (var_6401_cast_fp16, var_6642_cast_fp16))[name = tensor<string, []>("op_6701_cast_fp16")];
+            tensor<string, []> var_6703_equation_0 = const()[name = tensor<string, []>("op_6703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6703_cast_fp16 = einsum(equation = var_6703_equation_0, values = (var_6401_cast_fp16, var_6643_cast_fp16))[name = tensor<string, []>("op_6703_cast_fp16")];
+            tensor<string, []> var_6705_equation_0 = const()[name = tensor<string, []>("op_6705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6705_cast_fp16 = einsum(equation = var_6705_equation_0, values = (var_6405_cast_fp16, var_6644_cast_fp16))[name = tensor<string, []>("op_6705_cast_fp16")];
+            tensor<string, []> var_6707_equation_0 = const()[name = tensor<string, []>("op_6707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6707_cast_fp16 = einsum(equation = var_6707_equation_0, values = (var_6405_cast_fp16, var_6645_cast_fp16))[name = tensor<string, []>("op_6707_cast_fp16")];
+            tensor<string, []> var_6709_equation_0 = const()[name = tensor<string, []>("op_6709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6709_cast_fp16 = einsum(equation = var_6709_equation_0, values = (var_6405_cast_fp16, var_6646_cast_fp16))[name = tensor<string, []>("op_6709_cast_fp16")];
+            tensor<string, []> var_6711_equation_0 = const()[name = tensor<string, []>("op_6711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6711_cast_fp16 = einsum(equation = var_6711_equation_0, values = (var_6405_cast_fp16, var_6647_cast_fp16))[name = tensor<string, []>("op_6711_cast_fp16")];
+            tensor<string, []> var_6713_equation_0 = const()[name = tensor<string, []>("op_6713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6713_cast_fp16 = einsum(equation = var_6713_equation_0, values = (var_6409_cast_fp16, var_6648_cast_fp16))[name = tensor<string, []>("op_6713_cast_fp16")];
+            tensor<string, []> var_6715_equation_0 = const()[name = tensor<string, []>("op_6715_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6715_cast_fp16 = einsum(equation = var_6715_equation_0, values = (var_6409_cast_fp16, var_6649_cast_fp16))[name = tensor<string, []>("op_6715_cast_fp16")];
+            tensor<string, []> var_6717_equation_0 = const()[name = tensor<string, []>("op_6717_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6717_cast_fp16 = einsum(equation = var_6717_equation_0, values = (var_6409_cast_fp16, var_6650_cast_fp16))[name = tensor<string, []>("op_6717_cast_fp16")];
+            tensor<string, []> var_6719_equation_0 = const()[name = tensor<string, []>("op_6719_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6719_cast_fp16 = einsum(equation = var_6719_equation_0, values = (var_6409_cast_fp16, var_6651_cast_fp16))[name = tensor<string, []>("op_6719_cast_fp16")];
+            tensor<string, []> var_6721_equation_0 = const()[name = tensor<string, []>("op_6721_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6721_cast_fp16 = einsum(equation = var_6721_equation_0, values = (var_6413_cast_fp16, var_6652_cast_fp16))[name = tensor<string, []>("op_6721_cast_fp16")];
+            tensor<string, []> var_6723_equation_0 = const()[name = tensor<string, []>("op_6723_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6723_cast_fp16 = einsum(equation = var_6723_equation_0, values = (var_6413_cast_fp16, var_6653_cast_fp16))[name = tensor<string, []>("op_6723_cast_fp16")];
+            tensor<string, []> var_6725_equation_0 = const()[name = tensor<string, []>("op_6725_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6725_cast_fp16 = einsum(equation = var_6725_equation_0, values = (var_6413_cast_fp16, var_6654_cast_fp16))[name = tensor<string, []>("op_6725_cast_fp16")];
+            tensor<string, []> var_6727_equation_0 = const()[name = tensor<string, []>("op_6727_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6727_cast_fp16 = einsum(equation = var_6727_equation_0, values = (var_6413_cast_fp16, var_6655_cast_fp16))[name = tensor<string, []>("op_6727_cast_fp16")];
+            tensor<string, []> var_6729_equation_0 = const()[name = tensor<string, []>("op_6729_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6729_cast_fp16 = einsum(equation = var_6729_equation_0, values = (var_6417_cast_fp16, var_6656_cast_fp16))[name = tensor<string, []>("op_6729_cast_fp16")];
+            tensor<string, []> var_6731_equation_0 = const()[name = tensor<string, []>("op_6731_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6731_cast_fp16 = einsum(equation = var_6731_equation_0, values = (var_6417_cast_fp16, var_6657_cast_fp16))[name = tensor<string, []>("op_6731_cast_fp16")];
+            tensor<string, []> var_6733_equation_0 = const()[name = tensor<string, []>("op_6733_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6733_cast_fp16 = einsum(equation = var_6733_equation_0, values = (var_6417_cast_fp16, var_6658_cast_fp16))[name = tensor<string, []>("op_6733_cast_fp16")];
+            tensor<string, []> var_6735_equation_0 = const()[name = tensor<string, []>("op_6735_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6735_cast_fp16 = einsum(equation = var_6735_equation_0, values = (var_6417_cast_fp16, var_6659_cast_fp16))[name = tensor<string, []>("op_6735_cast_fp16")];
+            tensor<string, []> var_6737_equation_0 = const()[name = tensor<string, []>("op_6737_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6737_cast_fp16 = einsum(equation = var_6737_equation_0, values = (var_6421_cast_fp16, var_6660_cast_fp16))[name = tensor<string, []>("op_6737_cast_fp16")];
+            tensor<string, []> var_6739_equation_0 = const()[name = tensor<string, []>("op_6739_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6739_cast_fp16 = einsum(equation = var_6739_equation_0, values = (var_6421_cast_fp16, var_6661_cast_fp16))[name = tensor<string, []>("op_6739_cast_fp16")];
+            tensor<string, []> var_6741_equation_0 = const()[name = tensor<string, []>("op_6741_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6741_cast_fp16 = einsum(equation = var_6741_equation_0, values = (var_6421_cast_fp16, var_6662_cast_fp16))[name = tensor<string, []>("op_6741_cast_fp16")];
+            tensor<string, []> var_6743_equation_0 = const()[name = tensor<string, []>("op_6743_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6743_cast_fp16 = einsum(equation = var_6743_equation_0, values = (var_6421_cast_fp16, var_6663_cast_fp16))[name = tensor<string, []>("op_6743_cast_fp16")];
+            tensor<string, []> var_6745_equation_0 = const()[name = tensor<string, []>("op_6745_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6745_cast_fp16 = einsum(equation = var_6745_equation_0, values = (var_6425_cast_fp16, var_6664_cast_fp16))[name = tensor<string, []>("op_6745_cast_fp16")];
+            tensor<string, []> var_6747_equation_0 = const()[name = tensor<string, []>("op_6747_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6747_cast_fp16 = einsum(equation = var_6747_equation_0, values = (var_6425_cast_fp16, var_6665_cast_fp16))[name = tensor<string, []>("op_6747_cast_fp16")];
+            tensor<string, []> var_6749_equation_0 = const()[name = tensor<string, []>("op_6749_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6749_cast_fp16 = einsum(equation = var_6749_equation_0, values = (var_6425_cast_fp16, var_6666_cast_fp16))[name = tensor<string, []>("op_6749_cast_fp16")];
+            tensor<string, []> var_6751_equation_0 = const()[name = tensor<string, []>("op_6751_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6751_cast_fp16 = einsum(equation = var_6751_equation_0, values = (var_6425_cast_fp16, var_6667_cast_fp16))[name = tensor<string, []>("op_6751_cast_fp16")];
+            tensor<string, []> var_6753_equation_0 = const()[name = tensor<string, []>("op_6753_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6753_cast_fp16 = einsum(equation = var_6753_equation_0, values = (var_6429_cast_fp16, var_6668_cast_fp16))[name = tensor<string, []>("op_6753_cast_fp16")];
+            tensor<string, []> var_6755_equation_0 = const()[name = tensor<string, []>("op_6755_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6755_cast_fp16 = einsum(equation = var_6755_equation_0, values = (var_6429_cast_fp16, var_6669_cast_fp16))[name = tensor<string, []>("op_6755_cast_fp16")];
+            tensor<string, []> var_6757_equation_0 = const()[name = tensor<string, []>("op_6757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6757_cast_fp16 = einsum(equation = var_6757_equation_0, values = (var_6429_cast_fp16, var_6670_cast_fp16))[name = tensor<string, []>("op_6757_cast_fp16")];
+            tensor<string, []> var_6759_equation_0 = const()[name = tensor<string, []>("op_6759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6759_cast_fp16 = einsum(equation = var_6759_equation_0, values = (var_6429_cast_fp16, var_6671_cast_fp16))[name = tensor<string, []>("op_6759_cast_fp16")];
+            tensor<string, []> var_6761_equation_0 = const()[name = tensor<string, []>("op_6761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6761_cast_fp16 = einsum(equation = var_6761_equation_0, values = (var_6433_cast_fp16, var_6672_cast_fp16))[name = tensor<string, []>("op_6761_cast_fp16")];
+            tensor<string, []> var_6763_equation_0 = const()[name = tensor<string, []>("op_6763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6763_cast_fp16 = einsum(equation = var_6763_equation_0, values = (var_6433_cast_fp16, var_6673_cast_fp16))[name = tensor<string, []>("op_6763_cast_fp16")];
+            tensor<string, []> var_6765_equation_0 = const()[name = tensor<string, []>("op_6765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6765_cast_fp16 = einsum(equation = var_6765_equation_0, values = (var_6433_cast_fp16, var_6674_cast_fp16))[name = tensor<string, []>("op_6765_cast_fp16")];
+            tensor<string, []> var_6767_equation_0 = const()[name = tensor<string, []>("op_6767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6767_cast_fp16 = einsum(equation = var_6767_equation_0, values = (var_6433_cast_fp16, var_6675_cast_fp16))[name = tensor<string, []>("op_6767_cast_fp16")];
+            tensor<string, []> var_6769_equation_0 = const()[name = tensor<string, []>("op_6769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6769_cast_fp16 = einsum(equation = var_6769_equation_0, values = (var_6437_cast_fp16, var_6676_cast_fp16))[name = tensor<string, []>("op_6769_cast_fp16")];
+            tensor<string, []> var_6771_equation_0 = const()[name = tensor<string, []>("op_6771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6771_cast_fp16 = einsum(equation = var_6771_equation_0, values = (var_6437_cast_fp16, var_6677_cast_fp16))[name = tensor<string, []>("op_6771_cast_fp16")];
+            tensor<string, []> var_6773_equation_0 = const()[name = tensor<string, []>("op_6773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6773_cast_fp16 = einsum(equation = var_6773_equation_0, values = (var_6437_cast_fp16, var_6678_cast_fp16))[name = tensor<string, []>("op_6773_cast_fp16")];
+            tensor<string, []> var_6775_equation_0 = const()[name = tensor<string, []>("op_6775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_6775_cast_fp16 = einsum(equation = var_6775_equation_0, values = (var_6437_cast_fp16, var_6679_cast_fp16))[name = tensor<string, []>("op_6775_cast_fp16")];
+            tensor<bool, []> var_6777_interleave_0 = const()[name = tensor<string, []>("op_6777_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6777_cast_fp16 = concat(axis = var_5888, interleave = var_6777_interleave_0, values = (var_6681_cast_fp16, var_6683_cast_fp16, var_6685_cast_fp16, var_6687_cast_fp16))[name = tensor<string, []>("op_6777_cast_fp16")];
+            tensor<bool, []> var_6779_interleave_0 = const()[name = tensor<string, []>("op_6779_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6779_cast_fp16 = concat(axis = var_5888, interleave = var_6779_interleave_0, values = (var_6689_cast_fp16, var_6691_cast_fp16, var_6693_cast_fp16, var_6695_cast_fp16))[name = tensor<string, []>("op_6779_cast_fp16")];
+            tensor<bool, []> var_6781_interleave_0 = const()[name = tensor<string, []>("op_6781_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6781_cast_fp16 = concat(axis = var_5888, interleave = var_6781_interleave_0, values = (var_6697_cast_fp16, var_6699_cast_fp16, var_6701_cast_fp16, var_6703_cast_fp16))[name = tensor<string, []>("op_6781_cast_fp16")];
+            tensor<bool, []> var_6783_interleave_0 = const()[name = tensor<string, []>("op_6783_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6783_cast_fp16 = concat(axis = var_5888, interleave = var_6783_interleave_0, values = (var_6705_cast_fp16, var_6707_cast_fp16, var_6709_cast_fp16, var_6711_cast_fp16))[name = tensor<string, []>("op_6783_cast_fp16")];
+            tensor<bool, []> var_6785_interleave_0 = const()[name = tensor<string, []>("op_6785_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6785_cast_fp16 = concat(axis = var_5888, interleave = var_6785_interleave_0, values = (var_6713_cast_fp16, var_6715_cast_fp16, var_6717_cast_fp16, var_6719_cast_fp16))[name = tensor<string, []>("op_6785_cast_fp16")];
+            tensor<bool, []> var_6787_interleave_0 = const()[name = tensor<string, []>("op_6787_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6787_cast_fp16 = concat(axis = var_5888, interleave = var_6787_interleave_0, values = (var_6721_cast_fp16, var_6723_cast_fp16, var_6725_cast_fp16, var_6727_cast_fp16))[name = tensor<string, []>("op_6787_cast_fp16")];
+            tensor<bool, []> var_6789_interleave_0 = const()[name = tensor<string, []>("op_6789_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6789_cast_fp16 = concat(axis = var_5888, interleave = var_6789_interleave_0, values = (var_6729_cast_fp16, var_6731_cast_fp16, var_6733_cast_fp16, var_6735_cast_fp16))[name = tensor<string, []>("op_6789_cast_fp16")];
+            tensor<bool, []> var_6791_interleave_0 = const()[name = tensor<string, []>("op_6791_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6791_cast_fp16 = concat(axis = var_5888, interleave = var_6791_interleave_0, values = (var_6737_cast_fp16, var_6739_cast_fp16, var_6741_cast_fp16, var_6743_cast_fp16))[name = tensor<string, []>("op_6791_cast_fp16")];
+            tensor<bool, []> var_6793_interleave_0 = const()[name = tensor<string, []>("op_6793_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6793_cast_fp16 = concat(axis = var_5888, interleave = var_6793_interleave_0, values = (var_6745_cast_fp16, var_6747_cast_fp16, var_6749_cast_fp16, var_6751_cast_fp16))[name = tensor<string, []>("op_6793_cast_fp16")];
+            tensor<bool, []> var_6795_interleave_0 = const()[name = tensor<string, []>("op_6795_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6795_cast_fp16 = concat(axis = var_5888, interleave = var_6795_interleave_0, values = (var_6753_cast_fp16, var_6755_cast_fp16, var_6757_cast_fp16, var_6759_cast_fp16))[name = tensor<string, []>("op_6795_cast_fp16")];
+            tensor<bool, []> var_6797_interleave_0 = const()[name = tensor<string, []>("op_6797_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6797_cast_fp16 = concat(axis = var_5888, interleave = var_6797_interleave_0, values = (var_6761_cast_fp16, var_6763_cast_fp16, var_6765_cast_fp16, var_6767_cast_fp16))[name = tensor<string, []>("op_6797_cast_fp16")];
+            tensor<bool, []> var_6799_interleave_0 = const()[name = tensor<string, []>("op_6799_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6799_cast_fp16 = concat(axis = var_5888, interleave = var_6799_interleave_0, values = (var_6769_cast_fp16, var_6771_cast_fp16, var_6773_cast_fp16, var_6775_cast_fp16))[name = tensor<string, []>("op_6799_cast_fp16")];
+            tensor<bool, []> input_49_interleave_0 = const()[name = tensor<string, []>("input_49_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_49_cast_fp16 = concat(axis = var_5905, interleave = input_49_interleave_0, values = (var_6777_cast_fp16, var_6779_cast_fp16, var_6781_cast_fp16, var_6783_cast_fp16, var_6785_cast_fp16, var_6787_cast_fp16, var_6789_cast_fp16, var_6791_cast_fp16, var_6793_cast_fp16, var_6795_cast_fp16, var_6797_cast_fp16, var_6799_cast_fp16))[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<int32, [2]> var_6804 = const()[name = tensor<string, []>("op_6804"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6806 = const()[name = tensor<string, []>("op_6806"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_27_pad_type_0 = const()[name = tensor<string, []>("obj_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_27_pad_0 = const()[name = tensor<string, []>("obj_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94814784)))];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95994496)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_27_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = var_6806, groups = var_5905, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = var_6804, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> var_6812 = const()[name = tensor<string, []>("op_6812"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_27_cast_fp16 = reduce_mean(axes = var_6812, keep_dims = var_5906, x = inputs_27_cast_fp16)[name = tensor<string, []>("channels_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_27_cast_fp16 = sub(x = inputs_27_cast_fp16, y = channels_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = zero_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_sq_27_cast_fp16")];
+            tensor<int32, [1]> var_6816 = const()[name = tensor<string, []>("op_6816"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_6817_cast_fp16 = reduce_mean(axes = var_6816, keep_dims = var_5906, x = zero_mean_sq_27_cast_fp16)[name = tensor<string, []>("op_6817_cast_fp16")];
+            tensor<fp16, []> var_6818_to_fp16 = const()[name = tensor<string, []>("op_6818_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_6819_cast_fp16 = add(x = var_6817_cast_fp16, y = var_6818_to_fp16)[name = tensor<string, []>("op_6819_cast_fp16")];
+            tensor<fp16, []> denom_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0_to_fp16, x = var_6819_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = denom_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [768]> input_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95996096)))];
+            tensor<fp16, [768]> input_51_beta_0_to_fp16 = const()[name = tensor<string, []>("input_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95997696)))];
+            tensor<fp16, []> input_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<int32, [2]> var_6830 = const()[name = tensor<string, []>("op_6830"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6832 = const()[name = tensor<string, []>("op_6832"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_53_pad_type_0 = const()[name = tensor<string, []>("input_53_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_53_pad_0 = const()[name = tensor<string, []>("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95999296)))];
+            tensor<fp16, [3072]> layers_6_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100717952)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_53_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = var_6832, groups = var_5905, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = var_6830, weight = layers_6_fc1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<int32, [2]> var_6838 = const()[name = tensor<string, []>("op_6838"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6840 = const()[name = tensor<string, []>("op_6840"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_17_pad_type_0 = const()[name = tensor<string, []>("hidden_states_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_17_pad_0 = const()[name = tensor<string, []>("hidden_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100724160)))];
+            tensor<fp16, [768]> layers_6_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105442816)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_17_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = var_6840, groups = var_5905, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = var_6838, weight = layers_6_fc2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_6847 = const()[name = tensor<string, []>("op_6847"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_6864 = const()[name = tensor<string, []>("op_6864"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_6865 = const()[name = tensor<string, []>("op_6865"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_6875 = const()[name = tensor<string, []>("op_6875"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_29_cast_fp16 = reduce_mean(axes = var_6875, keep_dims = var_6865, x = inputs_29_cast_fp16)[name = tensor<string, []>("channels_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_29_cast_fp16 = sub(x = inputs_29_cast_fp16, y = channels_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = zero_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_sq_29_cast_fp16")];
+            tensor<int32, [1]> var_6879 = const()[name = tensor<string, []>("op_6879"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_6880_cast_fp16 = reduce_mean(axes = var_6879, keep_dims = var_6865, x = zero_mean_sq_29_cast_fp16)[name = tensor<string, []>("op_6880_cast_fp16")];
+            tensor<fp16, []> var_6881_to_fp16 = const()[name = tensor<string, []>("op_6881_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_6882_cast_fp16 = add(x = var_6880_cast_fp16, y = var_6881_to_fp16)[name = tensor<string, []>("op_6882_cast_fp16")];
+            tensor<fp16, []> denom_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0_to_fp16, x = var_6882_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = denom_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [768]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105444416)))];
+            tensor<fp16, [768]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105446016)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<int32, [2]> var_6897 = const()[name = tensor<string, []>("op_6897"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6899 = const()[name = tensor<string, []>("op_6899"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105447616)))];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106627328)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_15_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = var_6899, groups = var_6864, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_6897, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<int32, [2]> var_6903 = const()[name = tensor<string, []>("op_6903"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6905 = const()[name = tensor<string, []>("op_6905"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106628928)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_15_cast_fp16 = conv(dilations = var_6905, groups = var_6864, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_6903, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<int32, [2]> var_6910 = const()[name = tensor<string, []>("op_6910"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_6912 = const()[name = tensor<string, []>("op_6912"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107808640)))];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108988352)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = var_6912, groups = var_6864, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_6910, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_6919_begin_0 = const()[name = tensor<string, []>("op_6919_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6919_end_0 = const()[name = tensor<string, []>("op_6919_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6919_end_mask_0 = const()[name = tensor<string, []>("op_6919_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6919_cast_fp16 = slice_by_index(begin = var_6919_begin_0, end = var_6919_end_0, end_mask = var_6919_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6919_cast_fp16")];
+            tensor<int32, [4]> var_6923_begin_0 = const()[name = tensor<string, []>("op_6923_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_6923_end_0 = const()[name = tensor<string, []>("op_6923_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_6923_end_mask_0 = const()[name = tensor<string, []>("op_6923_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6923_cast_fp16 = slice_by_index(begin = var_6923_begin_0, end = var_6923_end_0, end_mask = var_6923_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6923_cast_fp16")];
+            tensor<int32, [4]> var_6927_begin_0 = const()[name = tensor<string, []>("op_6927_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_6927_end_0 = const()[name = tensor<string, []>("op_6927_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_6927_end_mask_0 = const()[name = tensor<string, []>("op_6927_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6927_cast_fp16 = slice_by_index(begin = var_6927_begin_0, end = var_6927_end_0, end_mask = var_6927_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6927_cast_fp16")];
+            tensor<int32, [4]> var_6931_begin_0 = const()[name = tensor<string, []>("op_6931_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_6931_end_0 = const()[name = tensor<string, []>("op_6931_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_6931_end_mask_0 = const()[name = tensor<string, []>("op_6931_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6931_cast_fp16 = slice_by_index(begin = var_6931_begin_0, end = var_6931_end_0, end_mask = var_6931_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6931_cast_fp16")];
+            tensor<int32, [4]> var_6935_begin_0 = const()[name = tensor<string, []>("op_6935_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_6935_end_0 = const()[name = tensor<string, []>("op_6935_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_6935_end_mask_0 = const()[name = tensor<string, []>("op_6935_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6935_cast_fp16 = slice_by_index(begin = var_6935_begin_0, end = var_6935_end_0, end_mask = var_6935_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6935_cast_fp16")];
+            tensor<int32, [4]> var_6939_begin_0 = const()[name = tensor<string, []>("op_6939_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_6939_end_0 = const()[name = tensor<string, []>("op_6939_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_6939_end_mask_0 = const()[name = tensor<string, []>("op_6939_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6939_cast_fp16 = slice_by_index(begin = var_6939_begin_0, end = var_6939_end_0, end_mask = var_6939_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6939_cast_fp16")];
+            tensor<int32, [4]> var_6943_begin_0 = const()[name = tensor<string, []>("op_6943_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_6943_end_0 = const()[name = tensor<string, []>("op_6943_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_6943_end_mask_0 = const()[name = tensor<string, []>("op_6943_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6943_cast_fp16 = slice_by_index(begin = var_6943_begin_0, end = var_6943_end_0, end_mask = var_6943_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6943_cast_fp16")];
+            tensor<int32, [4]> var_6947_begin_0 = const()[name = tensor<string, []>("op_6947_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_6947_end_0 = const()[name = tensor<string, []>("op_6947_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_6947_end_mask_0 = const()[name = tensor<string, []>("op_6947_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6947_cast_fp16 = slice_by_index(begin = var_6947_begin_0, end = var_6947_end_0, end_mask = var_6947_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6947_cast_fp16")];
+            tensor<int32, [4]> var_6951_begin_0 = const()[name = tensor<string, []>("op_6951_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_6951_end_0 = const()[name = tensor<string, []>("op_6951_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_6951_end_mask_0 = const()[name = tensor<string, []>("op_6951_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6951_cast_fp16 = slice_by_index(begin = var_6951_begin_0, end = var_6951_end_0, end_mask = var_6951_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6951_cast_fp16")];
+            tensor<int32, [4]> var_6955_begin_0 = const()[name = tensor<string, []>("op_6955_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_6955_end_0 = const()[name = tensor<string, []>("op_6955_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_6955_end_mask_0 = const()[name = tensor<string, []>("op_6955_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6955_cast_fp16 = slice_by_index(begin = var_6955_begin_0, end = var_6955_end_0, end_mask = var_6955_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6955_cast_fp16")];
+            tensor<int32, [4]> var_6959_begin_0 = const()[name = tensor<string, []>("op_6959_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_6959_end_0 = const()[name = tensor<string, []>("op_6959_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_6959_end_mask_0 = const()[name = tensor<string, []>("op_6959_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6959_cast_fp16 = slice_by_index(begin = var_6959_begin_0, end = var_6959_end_0, end_mask = var_6959_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6959_cast_fp16")];
+            tensor<int32, [4]> var_6963_begin_0 = const()[name = tensor<string, []>("op_6963_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_6963_end_0 = const()[name = tensor<string, []>("op_6963_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_6963_end_mask_0 = const()[name = tensor<string, []>("op_6963_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_6963_cast_fp16 = slice_by_index(begin = var_6963_begin_0, end = var_6963_end_0, end_mask = var_6963_end_mask_0, x = query_15_cast_fp16)[name = tensor<string, []>("op_6963_cast_fp16")];
+            tensor<int32, [4]> var_6972_begin_0 = const()[name = tensor<string, []>("op_6972_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_6972_end_0 = const()[name = tensor<string, []>("op_6972_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_6972_end_mask_0 = const()[name = tensor<string, []>("op_6972_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6972_cast_fp16 = slice_by_index(begin = var_6972_begin_0, end = var_6972_end_0, end_mask = var_6972_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6972_cast_fp16")];
+            tensor<int32, [4]> var_6979_begin_0 = const()[name = tensor<string, []>("op_6979_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_6979_end_0 = const()[name = tensor<string, []>("op_6979_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_6979_end_mask_0 = const()[name = tensor<string, []>("op_6979_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6979_cast_fp16 = slice_by_index(begin = var_6979_begin_0, end = var_6979_end_0, end_mask = var_6979_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6979_cast_fp16")];
+            tensor<int32, [4]> var_6986_begin_0 = const()[name = tensor<string, []>("op_6986_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_6986_end_0 = const()[name = tensor<string, []>("op_6986_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_6986_end_mask_0 = const()[name = tensor<string, []>("op_6986_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6986_cast_fp16 = slice_by_index(begin = var_6986_begin_0, end = var_6986_end_0, end_mask = var_6986_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6986_cast_fp16")];
+            tensor<int32, [4]> var_6993_begin_0 = const()[name = tensor<string, []>("op_6993_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_6993_end_0 = const()[name = tensor<string, []>("op_6993_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_6993_end_mask_0 = const()[name = tensor<string, []>("op_6993_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_6993_cast_fp16 = slice_by_index(begin = var_6993_begin_0, end = var_6993_end_0, end_mask = var_6993_end_mask_0, x = var_6919_cast_fp16)[name = tensor<string, []>("op_6993_cast_fp16")];
+            tensor<int32, [4]> var_7000_begin_0 = const()[name = tensor<string, []>("op_7000_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7000_end_0 = const()[name = tensor<string, []>("op_7000_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7000_end_mask_0 = const()[name = tensor<string, []>("op_7000_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7000_cast_fp16 = slice_by_index(begin = var_7000_begin_0, end = var_7000_end_0, end_mask = var_7000_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7000_cast_fp16")];
+            tensor<int32, [4]> var_7007_begin_0 = const()[name = tensor<string, []>("op_7007_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7007_end_0 = const()[name = tensor<string, []>("op_7007_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7007_end_mask_0 = const()[name = tensor<string, []>("op_7007_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7007_cast_fp16 = slice_by_index(begin = var_7007_begin_0, end = var_7007_end_0, end_mask = var_7007_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7007_cast_fp16")];
+            tensor<int32, [4]> var_7014_begin_0 = const()[name = tensor<string, []>("op_7014_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7014_end_0 = const()[name = tensor<string, []>("op_7014_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7014_end_mask_0 = const()[name = tensor<string, []>("op_7014_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7014_cast_fp16 = slice_by_index(begin = var_7014_begin_0, end = var_7014_end_0, end_mask = var_7014_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7014_cast_fp16")];
+            tensor<int32, [4]> var_7021_begin_0 = const()[name = tensor<string, []>("op_7021_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7021_end_0 = const()[name = tensor<string, []>("op_7021_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7021_end_mask_0 = const()[name = tensor<string, []>("op_7021_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7021_cast_fp16 = slice_by_index(begin = var_7021_begin_0, end = var_7021_end_0, end_mask = var_7021_end_mask_0, x = var_6923_cast_fp16)[name = tensor<string, []>("op_7021_cast_fp16")];
+            tensor<int32, [4]> var_7028_begin_0 = const()[name = tensor<string, []>("op_7028_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7028_end_0 = const()[name = tensor<string, []>("op_7028_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7028_end_mask_0 = const()[name = tensor<string, []>("op_7028_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7028_cast_fp16 = slice_by_index(begin = var_7028_begin_0, end = var_7028_end_0, end_mask = var_7028_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7028_cast_fp16")];
+            tensor<int32, [4]> var_7035_begin_0 = const()[name = tensor<string, []>("op_7035_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7035_end_0 = const()[name = tensor<string, []>("op_7035_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7035_end_mask_0 = const()[name = tensor<string, []>("op_7035_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7035_cast_fp16 = slice_by_index(begin = var_7035_begin_0, end = var_7035_end_0, end_mask = var_7035_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7035_cast_fp16")];
+            tensor<int32, [4]> var_7042_begin_0 = const()[name = tensor<string, []>("op_7042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7042_end_0 = const()[name = tensor<string, []>("op_7042_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7042_end_mask_0 = const()[name = tensor<string, []>("op_7042_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7042_cast_fp16 = slice_by_index(begin = var_7042_begin_0, end = var_7042_end_0, end_mask = var_7042_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7042_cast_fp16")];
+            tensor<int32, [4]> var_7049_begin_0 = const()[name = tensor<string, []>("op_7049_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7049_end_0 = const()[name = tensor<string, []>("op_7049_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7049_end_mask_0 = const()[name = tensor<string, []>("op_7049_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7049_cast_fp16 = slice_by_index(begin = var_7049_begin_0, end = var_7049_end_0, end_mask = var_7049_end_mask_0, x = var_6927_cast_fp16)[name = tensor<string, []>("op_7049_cast_fp16")];
+            tensor<int32, [4]> var_7056_begin_0 = const()[name = tensor<string, []>("op_7056_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7056_end_0 = const()[name = tensor<string, []>("op_7056_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7056_end_mask_0 = const()[name = tensor<string, []>("op_7056_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7056_cast_fp16 = slice_by_index(begin = var_7056_begin_0, end = var_7056_end_0, end_mask = var_7056_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7056_cast_fp16")];
+            tensor<int32, [4]> var_7063_begin_0 = const()[name = tensor<string, []>("op_7063_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7063_end_0 = const()[name = tensor<string, []>("op_7063_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7063_end_mask_0 = const()[name = tensor<string, []>("op_7063_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7063_cast_fp16 = slice_by_index(begin = var_7063_begin_0, end = var_7063_end_0, end_mask = var_7063_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7063_cast_fp16")];
+            tensor<int32, [4]> var_7070_begin_0 = const()[name = tensor<string, []>("op_7070_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7070_end_0 = const()[name = tensor<string, []>("op_7070_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7070_end_mask_0 = const()[name = tensor<string, []>("op_7070_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7070_cast_fp16 = slice_by_index(begin = var_7070_begin_0, end = var_7070_end_0, end_mask = var_7070_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7070_cast_fp16")];
+            tensor<int32, [4]> var_7077_begin_0 = const()[name = tensor<string, []>("op_7077_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7077_end_0 = const()[name = tensor<string, []>("op_7077_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7077_end_mask_0 = const()[name = tensor<string, []>("op_7077_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7077_cast_fp16 = slice_by_index(begin = var_7077_begin_0, end = var_7077_end_0, end_mask = var_7077_end_mask_0, x = var_6931_cast_fp16)[name = tensor<string, []>("op_7077_cast_fp16")];
+            tensor<int32, [4]> var_7084_begin_0 = const()[name = tensor<string, []>("op_7084_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7084_end_0 = const()[name = tensor<string, []>("op_7084_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7084_end_mask_0 = const()[name = tensor<string, []>("op_7084_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7084_cast_fp16 = slice_by_index(begin = var_7084_begin_0, end = var_7084_end_0, end_mask = var_7084_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7084_cast_fp16")];
+            tensor<int32, [4]> var_7091_begin_0 = const()[name = tensor<string, []>("op_7091_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7091_end_0 = const()[name = tensor<string, []>("op_7091_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7091_end_mask_0 = const()[name = tensor<string, []>("op_7091_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7091_cast_fp16 = slice_by_index(begin = var_7091_begin_0, end = var_7091_end_0, end_mask = var_7091_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7091_cast_fp16")];
+            tensor<int32, [4]> var_7098_begin_0 = const()[name = tensor<string, []>("op_7098_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7098_end_0 = const()[name = tensor<string, []>("op_7098_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7098_end_mask_0 = const()[name = tensor<string, []>("op_7098_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7098_cast_fp16 = slice_by_index(begin = var_7098_begin_0, end = var_7098_end_0, end_mask = var_7098_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7098_cast_fp16")];
+            tensor<int32, [4]> var_7105_begin_0 = const()[name = tensor<string, []>("op_7105_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7105_end_0 = const()[name = tensor<string, []>("op_7105_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7105_end_mask_0 = const()[name = tensor<string, []>("op_7105_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7105_cast_fp16 = slice_by_index(begin = var_7105_begin_0, end = var_7105_end_0, end_mask = var_7105_end_mask_0, x = var_6935_cast_fp16)[name = tensor<string, []>("op_7105_cast_fp16")];
+            tensor<int32, [4]> var_7112_begin_0 = const()[name = tensor<string, []>("op_7112_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7112_end_0 = const()[name = tensor<string, []>("op_7112_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7112_end_mask_0 = const()[name = tensor<string, []>("op_7112_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7112_cast_fp16 = slice_by_index(begin = var_7112_begin_0, end = var_7112_end_0, end_mask = var_7112_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7112_cast_fp16")];
+            tensor<int32, [4]> var_7119_begin_0 = const()[name = tensor<string, []>("op_7119_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7119_end_0 = const()[name = tensor<string, []>("op_7119_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7119_end_mask_0 = const()[name = tensor<string, []>("op_7119_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7119_cast_fp16 = slice_by_index(begin = var_7119_begin_0, end = var_7119_end_0, end_mask = var_7119_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7119_cast_fp16")];
+            tensor<int32, [4]> var_7126_begin_0 = const()[name = tensor<string, []>("op_7126_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7126_end_0 = const()[name = tensor<string, []>("op_7126_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7126_end_mask_0 = const()[name = tensor<string, []>("op_7126_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7126_cast_fp16 = slice_by_index(begin = var_7126_begin_0, end = var_7126_end_0, end_mask = var_7126_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7126_cast_fp16")];
+            tensor<int32, [4]> var_7133_begin_0 = const()[name = tensor<string, []>("op_7133_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7133_end_0 = const()[name = tensor<string, []>("op_7133_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7133_end_mask_0 = const()[name = tensor<string, []>("op_7133_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7133_cast_fp16 = slice_by_index(begin = var_7133_begin_0, end = var_7133_end_0, end_mask = var_7133_end_mask_0, x = var_6939_cast_fp16)[name = tensor<string, []>("op_7133_cast_fp16")];
+            tensor<int32, [4]> var_7140_begin_0 = const()[name = tensor<string, []>("op_7140_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7140_end_0 = const()[name = tensor<string, []>("op_7140_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7140_end_mask_0 = const()[name = tensor<string, []>("op_7140_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7140_cast_fp16 = slice_by_index(begin = var_7140_begin_0, end = var_7140_end_0, end_mask = var_7140_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7140_cast_fp16")];
+            tensor<int32, [4]> var_7147_begin_0 = const()[name = tensor<string, []>("op_7147_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7147_end_0 = const()[name = tensor<string, []>("op_7147_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7147_end_mask_0 = const()[name = tensor<string, []>("op_7147_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7147_cast_fp16 = slice_by_index(begin = var_7147_begin_0, end = var_7147_end_0, end_mask = var_7147_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7147_cast_fp16")];
+            tensor<int32, [4]> var_7154_begin_0 = const()[name = tensor<string, []>("op_7154_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7154_end_0 = const()[name = tensor<string, []>("op_7154_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7154_end_mask_0 = const()[name = tensor<string, []>("op_7154_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7154_cast_fp16 = slice_by_index(begin = var_7154_begin_0, end = var_7154_end_0, end_mask = var_7154_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7154_cast_fp16")];
+            tensor<int32, [4]> var_7161_begin_0 = const()[name = tensor<string, []>("op_7161_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7161_end_0 = const()[name = tensor<string, []>("op_7161_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7161_end_mask_0 = const()[name = tensor<string, []>("op_7161_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7161_cast_fp16 = slice_by_index(begin = var_7161_begin_0, end = var_7161_end_0, end_mask = var_7161_end_mask_0, x = var_6943_cast_fp16)[name = tensor<string, []>("op_7161_cast_fp16")];
+            tensor<int32, [4]> var_7168_begin_0 = const()[name = tensor<string, []>("op_7168_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7168_end_0 = const()[name = tensor<string, []>("op_7168_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7168_end_mask_0 = const()[name = tensor<string, []>("op_7168_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7168_cast_fp16 = slice_by_index(begin = var_7168_begin_0, end = var_7168_end_0, end_mask = var_7168_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7168_cast_fp16")];
+            tensor<int32, [4]> var_7175_begin_0 = const()[name = tensor<string, []>("op_7175_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7175_end_0 = const()[name = tensor<string, []>("op_7175_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7175_end_mask_0 = const()[name = tensor<string, []>("op_7175_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7175_cast_fp16 = slice_by_index(begin = var_7175_begin_0, end = var_7175_end_0, end_mask = var_7175_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7175_cast_fp16")];
+            tensor<int32, [4]> var_7182_begin_0 = const()[name = tensor<string, []>("op_7182_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7182_end_0 = const()[name = tensor<string, []>("op_7182_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7182_end_mask_0 = const()[name = tensor<string, []>("op_7182_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7182_cast_fp16 = slice_by_index(begin = var_7182_begin_0, end = var_7182_end_0, end_mask = var_7182_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7182_cast_fp16")];
+            tensor<int32, [4]> var_7189_begin_0 = const()[name = tensor<string, []>("op_7189_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7189_end_0 = const()[name = tensor<string, []>("op_7189_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7189_end_mask_0 = const()[name = tensor<string, []>("op_7189_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7189_cast_fp16 = slice_by_index(begin = var_7189_begin_0, end = var_7189_end_0, end_mask = var_7189_end_mask_0, x = var_6947_cast_fp16)[name = tensor<string, []>("op_7189_cast_fp16")];
+            tensor<int32, [4]> var_7196_begin_0 = const()[name = tensor<string, []>("op_7196_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7196_end_0 = const()[name = tensor<string, []>("op_7196_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7196_end_mask_0 = const()[name = tensor<string, []>("op_7196_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7196_cast_fp16 = slice_by_index(begin = var_7196_begin_0, end = var_7196_end_0, end_mask = var_7196_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7196_cast_fp16")];
+            tensor<int32, [4]> var_7203_begin_0 = const()[name = tensor<string, []>("op_7203_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7203_end_0 = const()[name = tensor<string, []>("op_7203_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7203_end_mask_0 = const()[name = tensor<string, []>("op_7203_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7203_cast_fp16 = slice_by_index(begin = var_7203_begin_0, end = var_7203_end_0, end_mask = var_7203_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7203_cast_fp16")];
+            tensor<int32, [4]> var_7210_begin_0 = const()[name = tensor<string, []>("op_7210_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7210_end_0 = const()[name = tensor<string, []>("op_7210_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7210_end_mask_0 = const()[name = tensor<string, []>("op_7210_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7210_cast_fp16 = slice_by_index(begin = var_7210_begin_0, end = var_7210_end_0, end_mask = var_7210_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7210_cast_fp16")];
+            tensor<int32, [4]> var_7217_begin_0 = const()[name = tensor<string, []>("op_7217_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7217_end_0 = const()[name = tensor<string, []>("op_7217_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7217_end_mask_0 = const()[name = tensor<string, []>("op_7217_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7217_cast_fp16 = slice_by_index(begin = var_7217_begin_0, end = var_7217_end_0, end_mask = var_7217_end_mask_0, x = var_6951_cast_fp16)[name = tensor<string, []>("op_7217_cast_fp16")];
+            tensor<int32, [4]> var_7224_begin_0 = const()[name = tensor<string, []>("op_7224_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7224_end_0 = const()[name = tensor<string, []>("op_7224_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7224_end_mask_0 = const()[name = tensor<string, []>("op_7224_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7224_cast_fp16 = slice_by_index(begin = var_7224_begin_0, end = var_7224_end_0, end_mask = var_7224_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7224_cast_fp16")];
+            tensor<int32, [4]> var_7231_begin_0 = const()[name = tensor<string, []>("op_7231_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7231_end_0 = const()[name = tensor<string, []>("op_7231_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7231_end_mask_0 = const()[name = tensor<string, []>("op_7231_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7231_cast_fp16 = slice_by_index(begin = var_7231_begin_0, end = var_7231_end_0, end_mask = var_7231_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7231_cast_fp16")];
+            tensor<int32, [4]> var_7238_begin_0 = const()[name = tensor<string, []>("op_7238_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7238_end_0 = const()[name = tensor<string, []>("op_7238_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7238_end_mask_0 = const()[name = tensor<string, []>("op_7238_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7238_cast_fp16 = slice_by_index(begin = var_7238_begin_0, end = var_7238_end_0, end_mask = var_7238_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7238_cast_fp16")];
+            tensor<int32, [4]> var_7245_begin_0 = const()[name = tensor<string, []>("op_7245_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7245_end_0 = const()[name = tensor<string, []>("op_7245_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7245_end_mask_0 = const()[name = tensor<string, []>("op_7245_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7245_cast_fp16 = slice_by_index(begin = var_7245_begin_0, end = var_7245_end_0, end_mask = var_7245_end_mask_0, x = var_6955_cast_fp16)[name = tensor<string, []>("op_7245_cast_fp16")];
+            tensor<int32, [4]> var_7252_begin_0 = const()[name = tensor<string, []>("op_7252_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7252_end_0 = const()[name = tensor<string, []>("op_7252_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7252_end_mask_0 = const()[name = tensor<string, []>("op_7252_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7252_cast_fp16 = slice_by_index(begin = var_7252_begin_0, end = var_7252_end_0, end_mask = var_7252_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7252_cast_fp16")];
+            tensor<int32, [4]> var_7259_begin_0 = const()[name = tensor<string, []>("op_7259_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7259_end_0 = const()[name = tensor<string, []>("op_7259_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7259_end_mask_0 = const()[name = tensor<string, []>("op_7259_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7259_cast_fp16 = slice_by_index(begin = var_7259_begin_0, end = var_7259_end_0, end_mask = var_7259_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7259_cast_fp16")];
+            tensor<int32, [4]> var_7266_begin_0 = const()[name = tensor<string, []>("op_7266_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7266_end_0 = const()[name = tensor<string, []>("op_7266_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7266_end_mask_0 = const()[name = tensor<string, []>("op_7266_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7266_cast_fp16 = slice_by_index(begin = var_7266_begin_0, end = var_7266_end_0, end_mask = var_7266_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7266_cast_fp16")];
+            tensor<int32, [4]> var_7273_begin_0 = const()[name = tensor<string, []>("op_7273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7273_end_0 = const()[name = tensor<string, []>("op_7273_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7273_end_mask_0 = const()[name = tensor<string, []>("op_7273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7273_cast_fp16 = slice_by_index(begin = var_7273_begin_0, end = var_7273_end_0, end_mask = var_7273_end_mask_0, x = var_6959_cast_fp16)[name = tensor<string, []>("op_7273_cast_fp16")];
+            tensor<int32, [4]> var_7280_begin_0 = const()[name = tensor<string, []>("op_7280_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7280_end_0 = const()[name = tensor<string, []>("op_7280_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7280_end_mask_0 = const()[name = tensor<string, []>("op_7280_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7280_cast_fp16 = slice_by_index(begin = var_7280_begin_0, end = var_7280_end_0, end_mask = var_7280_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7280_cast_fp16")];
+            tensor<int32, [4]> var_7287_begin_0 = const()[name = tensor<string, []>("op_7287_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7287_end_0 = const()[name = tensor<string, []>("op_7287_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7287_end_mask_0 = const()[name = tensor<string, []>("op_7287_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7287_cast_fp16 = slice_by_index(begin = var_7287_begin_0, end = var_7287_end_0, end_mask = var_7287_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7287_cast_fp16")];
+            tensor<int32, [4]> var_7294_begin_0 = const()[name = tensor<string, []>("op_7294_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7294_end_0 = const()[name = tensor<string, []>("op_7294_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7294_end_mask_0 = const()[name = tensor<string, []>("op_7294_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7294_cast_fp16 = slice_by_index(begin = var_7294_begin_0, end = var_7294_end_0, end_mask = var_7294_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7294_cast_fp16")];
+            tensor<int32, [4]> var_7301_begin_0 = const()[name = tensor<string, []>("op_7301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7301_end_0 = const()[name = tensor<string, []>("op_7301_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7301_end_mask_0 = const()[name = tensor<string, []>("op_7301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7301_cast_fp16 = slice_by_index(begin = var_7301_begin_0, end = var_7301_end_0, end_mask = var_7301_end_mask_0, x = var_6963_cast_fp16)[name = tensor<string, []>("op_7301_cast_fp16")];
+            tensor<int32, [4]> k_15_perm_0 = const()[name = tensor<string, []>("k_15_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_7306_begin_0 = const()[name = tensor<string, []>("op_7306_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7306_end_0 = const()[name = tensor<string, []>("op_7306_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_7306_end_mask_0 = const()[name = tensor<string, []>("op_7306_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_4 = transpose(perm = k_15_perm_0, x = key_15_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7306_cast_fp16 = slice_by_index(begin = var_7306_begin_0, end = var_7306_end_0, end_mask = var_7306_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7306_cast_fp16")];
+            tensor<int32, [4]> var_7310_begin_0 = const()[name = tensor<string, []>("op_7310_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_7310_end_0 = const()[name = tensor<string, []>("op_7310_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_7310_end_mask_0 = const()[name = tensor<string, []>("op_7310_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7310_cast_fp16 = slice_by_index(begin = var_7310_begin_0, end = var_7310_end_0, end_mask = var_7310_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7310_cast_fp16")];
+            tensor<int32, [4]> var_7314_begin_0 = const()[name = tensor<string, []>("op_7314_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_7314_end_0 = const()[name = tensor<string, []>("op_7314_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_7314_end_mask_0 = const()[name = tensor<string, []>("op_7314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7314_cast_fp16 = slice_by_index(begin = var_7314_begin_0, end = var_7314_end_0, end_mask = var_7314_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7314_cast_fp16")];
+            tensor<int32, [4]> var_7318_begin_0 = const()[name = tensor<string, []>("op_7318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_7318_end_0 = const()[name = tensor<string, []>("op_7318_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_7318_end_mask_0 = const()[name = tensor<string, []>("op_7318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7318_cast_fp16 = slice_by_index(begin = var_7318_begin_0, end = var_7318_end_0, end_mask = var_7318_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7318_cast_fp16")];
+            tensor<int32, [4]> var_7322_begin_0 = const()[name = tensor<string, []>("op_7322_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_7322_end_0 = const()[name = tensor<string, []>("op_7322_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_7322_end_mask_0 = const()[name = tensor<string, []>("op_7322_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7322_cast_fp16 = slice_by_index(begin = var_7322_begin_0, end = var_7322_end_0, end_mask = var_7322_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7322_cast_fp16")];
+            tensor<int32, [4]> var_7326_begin_0 = const()[name = tensor<string, []>("op_7326_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_7326_end_0 = const()[name = tensor<string, []>("op_7326_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_7326_end_mask_0 = const()[name = tensor<string, []>("op_7326_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7326_cast_fp16 = slice_by_index(begin = var_7326_begin_0, end = var_7326_end_0, end_mask = var_7326_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7326_cast_fp16")];
+            tensor<int32, [4]> var_7330_begin_0 = const()[name = tensor<string, []>("op_7330_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_7330_end_0 = const()[name = tensor<string, []>("op_7330_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_7330_end_mask_0 = const()[name = tensor<string, []>("op_7330_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7330_cast_fp16 = slice_by_index(begin = var_7330_begin_0, end = var_7330_end_0, end_mask = var_7330_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7330_cast_fp16")];
+            tensor<int32, [4]> var_7334_begin_0 = const()[name = tensor<string, []>("op_7334_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_7334_end_0 = const()[name = tensor<string, []>("op_7334_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_7334_end_mask_0 = const()[name = tensor<string, []>("op_7334_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7334_cast_fp16 = slice_by_index(begin = var_7334_begin_0, end = var_7334_end_0, end_mask = var_7334_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7334_cast_fp16")];
+            tensor<int32, [4]> var_7338_begin_0 = const()[name = tensor<string, []>("op_7338_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_7338_end_0 = const()[name = tensor<string, []>("op_7338_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_7338_end_mask_0 = const()[name = tensor<string, []>("op_7338_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7338_cast_fp16 = slice_by_index(begin = var_7338_begin_0, end = var_7338_end_0, end_mask = var_7338_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7338_cast_fp16")];
+            tensor<int32, [4]> var_7342_begin_0 = const()[name = tensor<string, []>("op_7342_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_7342_end_0 = const()[name = tensor<string, []>("op_7342_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_7342_end_mask_0 = const()[name = tensor<string, []>("op_7342_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7342_cast_fp16 = slice_by_index(begin = var_7342_begin_0, end = var_7342_end_0, end_mask = var_7342_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7342_cast_fp16")];
+            tensor<int32, [4]> var_7346_begin_0 = const()[name = tensor<string, []>("op_7346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_7346_end_0 = const()[name = tensor<string, []>("op_7346_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_7346_end_mask_0 = const()[name = tensor<string, []>("op_7346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7346_cast_fp16 = slice_by_index(begin = var_7346_begin_0, end = var_7346_end_0, end_mask = var_7346_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7346_cast_fp16")];
+            tensor<int32, [4]> var_7350_begin_0 = const()[name = tensor<string, []>("op_7350_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_7350_end_0 = const()[name = tensor<string, []>("op_7350_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_7350_end_mask_0 = const()[name = tensor<string, []>("op_7350_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_7350_cast_fp16 = slice_by_index(begin = var_7350_begin_0, end = var_7350_end_0, end_mask = var_7350_end_mask_0, x = transpose_4)[name = tensor<string, []>("op_7350_cast_fp16")];
+            tensor<int32, [4]> var_7352_begin_0 = const()[name = tensor<string, []>("op_7352_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7352_end_0 = const()[name = tensor<string, []>("op_7352_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7352_end_mask_0 = const()[name = tensor<string, []>("op_7352_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7352_cast_fp16 = slice_by_index(begin = var_7352_begin_0, end = var_7352_end_0, end_mask = var_7352_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7352_cast_fp16")];
+            tensor<int32, [4]> var_7356_begin_0 = const()[name = tensor<string, []>("op_7356_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_7356_end_0 = const()[name = tensor<string, []>("op_7356_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_7356_end_mask_0 = const()[name = tensor<string, []>("op_7356_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7356_cast_fp16 = slice_by_index(begin = var_7356_begin_0, end = var_7356_end_0, end_mask = var_7356_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7356_cast_fp16")];
+            tensor<int32, [4]> var_7360_begin_0 = const()[name = tensor<string, []>("op_7360_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_7360_end_0 = const()[name = tensor<string, []>("op_7360_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_7360_end_mask_0 = const()[name = tensor<string, []>("op_7360_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7360_cast_fp16 = slice_by_index(begin = var_7360_begin_0, end = var_7360_end_0, end_mask = var_7360_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7360_cast_fp16")];
+            tensor<int32, [4]> var_7364_begin_0 = const()[name = tensor<string, []>("op_7364_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_7364_end_0 = const()[name = tensor<string, []>("op_7364_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_7364_end_mask_0 = const()[name = tensor<string, []>("op_7364_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7364_cast_fp16 = slice_by_index(begin = var_7364_begin_0, end = var_7364_end_0, end_mask = var_7364_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7364_cast_fp16")];
+            tensor<int32, [4]> var_7368_begin_0 = const()[name = tensor<string, []>("op_7368_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_7368_end_0 = const()[name = tensor<string, []>("op_7368_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_7368_end_mask_0 = const()[name = tensor<string, []>("op_7368_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7368_cast_fp16 = slice_by_index(begin = var_7368_begin_0, end = var_7368_end_0, end_mask = var_7368_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7368_cast_fp16")];
+            tensor<int32, [4]> var_7372_begin_0 = const()[name = tensor<string, []>("op_7372_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_7372_end_0 = const()[name = tensor<string, []>("op_7372_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_7372_end_mask_0 = const()[name = tensor<string, []>("op_7372_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7372_cast_fp16 = slice_by_index(begin = var_7372_begin_0, end = var_7372_end_0, end_mask = var_7372_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7372_cast_fp16")];
+            tensor<int32, [4]> var_7376_begin_0 = const()[name = tensor<string, []>("op_7376_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_7376_end_0 = const()[name = tensor<string, []>("op_7376_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_7376_end_mask_0 = const()[name = tensor<string, []>("op_7376_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7376_cast_fp16 = slice_by_index(begin = var_7376_begin_0, end = var_7376_end_0, end_mask = var_7376_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7376_cast_fp16")];
+            tensor<int32, [4]> var_7380_begin_0 = const()[name = tensor<string, []>("op_7380_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_7380_end_0 = const()[name = tensor<string, []>("op_7380_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_7380_end_mask_0 = const()[name = tensor<string, []>("op_7380_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7380_cast_fp16 = slice_by_index(begin = var_7380_begin_0, end = var_7380_end_0, end_mask = var_7380_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7380_cast_fp16")];
+            tensor<int32, [4]> var_7384_begin_0 = const()[name = tensor<string, []>("op_7384_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_7384_end_0 = const()[name = tensor<string, []>("op_7384_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_7384_end_mask_0 = const()[name = tensor<string, []>("op_7384_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7384_cast_fp16 = slice_by_index(begin = var_7384_begin_0, end = var_7384_end_0, end_mask = var_7384_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7384_cast_fp16")];
+            tensor<int32, [4]> var_7388_begin_0 = const()[name = tensor<string, []>("op_7388_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_7388_end_0 = const()[name = tensor<string, []>("op_7388_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_7388_end_mask_0 = const()[name = tensor<string, []>("op_7388_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7388_cast_fp16 = slice_by_index(begin = var_7388_begin_0, end = var_7388_end_0, end_mask = var_7388_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7388_cast_fp16")];
+            tensor<int32, [4]> var_7392_begin_0 = const()[name = tensor<string, []>("op_7392_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_7392_end_0 = const()[name = tensor<string, []>("op_7392_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_7392_end_mask_0 = const()[name = tensor<string, []>("op_7392_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7392_cast_fp16 = slice_by_index(begin = var_7392_begin_0, end = var_7392_end_0, end_mask = var_7392_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7392_cast_fp16")];
+            tensor<int32, [4]> var_7396_begin_0 = const()[name = tensor<string, []>("op_7396_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_7396_end_0 = const()[name = tensor<string, []>("op_7396_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_7396_end_mask_0 = const()[name = tensor<string, []>("op_7396_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7396_cast_fp16 = slice_by_index(begin = var_7396_begin_0, end = var_7396_end_0, end_mask = var_7396_end_mask_0, x = value_15_cast_fp16)[name = tensor<string, []>("op_7396_cast_fp16")];
+            tensor<string, []> var_7400_equation_0 = const()[name = tensor<string, []>("op_7400_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7400_cast_fp16 = einsum(equation = var_7400_equation_0, values = (var_7306_cast_fp16, var_6972_cast_fp16))[name = tensor<string, []>("op_7400_cast_fp16")];
+            tensor<fp16, []> var_7401_to_fp16 = const()[name = tensor<string, []>("op_7401_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_673_cast_fp16 = mul(x = var_7400_cast_fp16, y = var_7401_to_fp16)[name = tensor<string, []>("aw_chunk_673_cast_fp16")];
+            tensor<string, []> var_7404_equation_0 = const()[name = tensor<string, []>("op_7404_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7404_cast_fp16 = einsum(equation = var_7404_equation_0, values = (var_7306_cast_fp16, var_6979_cast_fp16))[name = tensor<string, []>("op_7404_cast_fp16")];
+            tensor<fp16, []> var_7405_to_fp16 = const()[name = tensor<string, []>("op_7405_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_675_cast_fp16 = mul(x = var_7404_cast_fp16, y = var_7405_to_fp16)[name = tensor<string, []>("aw_chunk_675_cast_fp16")];
+            tensor<string, []> var_7408_equation_0 = const()[name = tensor<string, []>("op_7408_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7408_cast_fp16 = einsum(equation = var_7408_equation_0, values = (var_7306_cast_fp16, var_6986_cast_fp16))[name = tensor<string, []>("op_7408_cast_fp16")];
+            tensor<fp16, []> var_7409_to_fp16 = const()[name = tensor<string, []>("op_7409_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_677_cast_fp16 = mul(x = var_7408_cast_fp16, y = var_7409_to_fp16)[name = tensor<string, []>("aw_chunk_677_cast_fp16")];
+            tensor<string, []> var_7412_equation_0 = const()[name = tensor<string, []>("op_7412_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7412_cast_fp16 = einsum(equation = var_7412_equation_0, values = (var_7306_cast_fp16, var_6993_cast_fp16))[name = tensor<string, []>("op_7412_cast_fp16")];
+            tensor<fp16, []> var_7413_to_fp16 = const()[name = tensor<string, []>("op_7413_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_679_cast_fp16 = mul(x = var_7412_cast_fp16, y = var_7413_to_fp16)[name = tensor<string, []>("aw_chunk_679_cast_fp16")];
+            tensor<string, []> var_7416_equation_0 = const()[name = tensor<string, []>("op_7416_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7416_cast_fp16 = einsum(equation = var_7416_equation_0, values = (var_7310_cast_fp16, var_7000_cast_fp16))[name = tensor<string, []>("op_7416_cast_fp16")];
+            tensor<fp16, []> var_7417_to_fp16 = const()[name = tensor<string, []>("op_7417_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_681_cast_fp16 = mul(x = var_7416_cast_fp16, y = var_7417_to_fp16)[name = tensor<string, []>("aw_chunk_681_cast_fp16")];
+            tensor<string, []> var_7420_equation_0 = const()[name = tensor<string, []>("op_7420_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7420_cast_fp16 = einsum(equation = var_7420_equation_0, values = (var_7310_cast_fp16, var_7007_cast_fp16))[name = tensor<string, []>("op_7420_cast_fp16")];
+            tensor<fp16, []> var_7421_to_fp16 = const()[name = tensor<string, []>("op_7421_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_683_cast_fp16 = mul(x = var_7420_cast_fp16, y = var_7421_to_fp16)[name = tensor<string, []>("aw_chunk_683_cast_fp16")];
+            tensor<string, []> var_7424_equation_0 = const()[name = tensor<string, []>("op_7424_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7424_cast_fp16 = einsum(equation = var_7424_equation_0, values = (var_7310_cast_fp16, var_7014_cast_fp16))[name = tensor<string, []>("op_7424_cast_fp16")];
+            tensor<fp16, []> var_7425_to_fp16 = const()[name = tensor<string, []>("op_7425_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_685_cast_fp16 = mul(x = var_7424_cast_fp16, y = var_7425_to_fp16)[name = tensor<string, []>("aw_chunk_685_cast_fp16")];
+            tensor<string, []> var_7428_equation_0 = const()[name = tensor<string, []>("op_7428_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7428_cast_fp16 = einsum(equation = var_7428_equation_0, values = (var_7310_cast_fp16, var_7021_cast_fp16))[name = tensor<string, []>("op_7428_cast_fp16")];
+            tensor<fp16, []> var_7429_to_fp16 = const()[name = tensor<string, []>("op_7429_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_687_cast_fp16 = mul(x = var_7428_cast_fp16, y = var_7429_to_fp16)[name = tensor<string, []>("aw_chunk_687_cast_fp16")];
+            tensor<string, []> var_7432_equation_0 = const()[name = tensor<string, []>("op_7432_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7432_cast_fp16 = einsum(equation = var_7432_equation_0, values = (var_7314_cast_fp16, var_7028_cast_fp16))[name = tensor<string, []>("op_7432_cast_fp16")];
+            tensor<fp16, []> var_7433_to_fp16 = const()[name = tensor<string, []>("op_7433_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_689_cast_fp16 = mul(x = var_7432_cast_fp16, y = var_7433_to_fp16)[name = tensor<string, []>("aw_chunk_689_cast_fp16")];
+            tensor<string, []> var_7436_equation_0 = const()[name = tensor<string, []>("op_7436_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7436_cast_fp16 = einsum(equation = var_7436_equation_0, values = (var_7314_cast_fp16, var_7035_cast_fp16))[name = tensor<string, []>("op_7436_cast_fp16")];
+            tensor<fp16, []> var_7437_to_fp16 = const()[name = tensor<string, []>("op_7437_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_691_cast_fp16 = mul(x = var_7436_cast_fp16, y = var_7437_to_fp16)[name = tensor<string, []>("aw_chunk_691_cast_fp16")];
+            tensor<string, []> var_7440_equation_0 = const()[name = tensor<string, []>("op_7440_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7440_cast_fp16 = einsum(equation = var_7440_equation_0, values = (var_7314_cast_fp16, var_7042_cast_fp16))[name = tensor<string, []>("op_7440_cast_fp16")];
+            tensor<fp16, []> var_7441_to_fp16 = const()[name = tensor<string, []>("op_7441_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_693_cast_fp16 = mul(x = var_7440_cast_fp16, y = var_7441_to_fp16)[name = tensor<string, []>("aw_chunk_693_cast_fp16")];
+            tensor<string, []> var_7444_equation_0 = const()[name = tensor<string, []>("op_7444_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7444_cast_fp16 = einsum(equation = var_7444_equation_0, values = (var_7314_cast_fp16, var_7049_cast_fp16))[name = tensor<string, []>("op_7444_cast_fp16")];
+            tensor<fp16, []> var_7445_to_fp16 = const()[name = tensor<string, []>("op_7445_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_695_cast_fp16 = mul(x = var_7444_cast_fp16, y = var_7445_to_fp16)[name = tensor<string, []>("aw_chunk_695_cast_fp16")];
+            tensor<string, []> var_7448_equation_0 = const()[name = tensor<string, []>("op_7448_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7448_cast_fp16 = einsum(equation = var_7448_equation_0, values = (var_7318_cast_fp16, var_7056_cast_fp16))[name = tensor<string, []>("op_7448_cast_fp16")];
+            tensor<fp16, []> var_7449_to_fp16 = const()[name = tensor<string, []>("op_7449_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_697_cast_fp16 = mul(x = var_7448_cast_fp16, y = var_7449_to_fp16)[name = tensor<string, []>("aw_chunk_697_cast_fp16")];
+            tensor<string, []> var_7452_equation_0 = const()[name = tensor<string, []>("op_7452_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7452_cast_fp16 = einsum(equation = var_7452_equation_0, values = (var_7318_cast_fp16, var_7063_cast_fp16))[name = tensor<string, []>("op_7452_cast_fp16")];
+            tensor<fp16, []> var_7453_to_fp16 = const()[name = tensor<string, []>("op_7453_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_699_cast_fp16 = mul(x = var_7452_cast_fp16, y = var_7453_to_fp16)[name = tensor<string, []>("aw_chunk_699_cast_fp16")];
+            tensor<string, []> var_7456_equation_0 = const()[name = tensor<string, []>("op_7456_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7456_cast_fp16 = einsum(equation = var_7456_equation_0, values = (var_7318_cast_fp16, var_7070_cast_fp16))[name = tensor<string, []>("op_7456_cast_fp16")];
+            tensor<fp16, []> var_7457_to_fp16 = const()[name = tensor<string, []>("op_7457_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_701_cast_fp16 = mul(x = var_7456_cast_fp16, y = var_7457_to_fp16)[name = tensor<string, []>("aw_chunk_701_cast_fp16")];
+            tensor<string, []> var_7460_equation_0 = const()[name = tensor<string, []>("op_7460_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7460_cast_fp16 = einsum(equation = var_7460_equation_0, values = (var_7318_cast_fp16, var_7077_cast_fp16))[name = tensor<string, []>("op_7460_cast_fp16")];
+            tensor<fp16, []> var_7461_to_fp16 = const()[name = tensor<string, []>("op_7461_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_703_cast_fp16 = mul(x = var_7460_cast_fp16, y = var_7461_to_fp16)[name = tensor<string, []>("aw_chunk_703_cast_fp16")];
+            tensor<string, []> var_7464_equation_0 = const()[name = tensor<string, []>("op_7464_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7464_cast_fp16 = einsum(equation = var_7464_equation_0, values = (var_7322_cast_fp16, var_7084_cast_fp16))[name = tensor<string, []>("op_7464_cast_fp16")];
+            tensor<fp16, []> var_7465_to_fp16 = const()[name = tensor<string, []>("op_7465_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_705_cast_fp16 = mul(x = var_7464_cast_fp16, y = var_7465_to_fp16)[name = tensor<string, []>("aw_chunk_705_cast_fp16")];
+            tensor<string, []> var_7468_equation_0 = const()[name = tensor<string, []>("op_7468_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7468_cast_fp16 = einsum(equation = var_7468_equation_0, values = (var_7322_cast_fp16, var_7091_cast_fp16))[name = tensor<string, []>("op_7468_cast_fp16")];
+            tensor<fp16, []> var_7469_to_fp16 = const()[name = tensor<string, []>("op_7469_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_707_cast_fp16 = mul(x = var_7468_cast_fp16, y = var_7469_to_fp16)[name = tensor<string, []>("aw_chunk_707_cast_fp16")];
+            tensor<string, []> var_7472_equation_0 = const()[name = tensor<string, []>("op_7472_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7472_cast_fp16 = einsum(equation = var_7472_equation_0, values = (var_7322_cast_fp16, var_7098_cast_fp16))[name = tensor<string, []>("op_7472_cast_fp16")];
+            tensor<fp16, []> var_7473_to_fp16 = const()[name = tensor<string, []>("op_7473_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_709_cast_fp16 = mul(x = var_7472_cast_fp16, y = var_7473_to_fp16)[name = tensor<string, []>("aw_chunk_709_cast_fp16")];
+            tensor<string, []> var_7476_equation_0 = const()[name = tensor<string, []>("op_7476_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7476_cast_fp16 = einsum(equation = var_7476_equation_0, values = (var_7322_cast_fp16, var_7105_cast_fp16))[name = tensor<string, []>("op_7476_cast_fp16")];
+            tensor<fp16, []> var_7477_to_fp16 = const()[name = tensor<string, []>("op_7477_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_711_cast_fp16 = mul(x = var_7476_cast_fp16, y = var_7477_to_fp16)[name = tensor<string, []>("aw_chunk_711_cast_fp16")];
+            tensor<string, []> var_7480_equation_0 = const()[name = tensor<string, []>("op_7480_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7480_cast_fp16 = einsum(equation = var_7480_equation_0, values = (var_7326_cast_fp16, var_7112_cast_fp16))[name = tensor<string, []>("op_7480_cast_fp16")];
+            tensor<fp16, []> var_7481_to_fp16 = const()[name = tensor<string, []>("op_7481_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_713_cast_fp16 = mul(x = var_7480_cast_fp16, y = var_7481_to_fp16)[name = tensor<string, []>("aw_chunk_713_cast_fp16")];
+            tensor<string, []> var_7484_equation_0 = const()[name = tensor<string, []>("op_7484_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7484_cast_fp16 = einsum(equation = var_7484_equation_0, values = (var_7326_cast_fp16, var_7119_cast_fp16))[name = tensor<string, []>("op_7484_cast_fp16")];
+            tensor<fp16, []> var_7485_to_fp16 = const()[name = tensor<string, []>("op_7485_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_715_cast_fp16 = mul(x = var_7484_cast_fp16, y = var_7485_to_fp16)[name = tensor<string, []>("aw_chunk_715_cast_fp16")];
+            tensor<string, []> var_7488_equation_0 = const()[name = tensor<string, []>("op_7488_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7488_cast_fp16 = einsum(equation = var_7488_equation_0, values = (var_7326_cast_fp16, var_7126_cast_fp16))[name = tensor<string, []>("op_7488_cast_fp16")];
+            tensor<fp16, []> var_7489_to_fp16 = const()[name = tensor<string, []>("op_7489_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_717_cast_fp16 = mul(x = var_7488_cast_fp16, y = var_7489_to_fp16)[name = tensor<string, []>("aw_chunk_717_cast_fp16")];
+            tensor<string, []> var_7492_equation_0 = const()[name = tensor<string, []>("op_7492_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7492_cast_fp16 = einsum(equation = var_7492_equation_0, values = (var_7326_cast_fp16, var_7133_cast_fp16))[name = tensor<string, []>("op_7492_cast_fp16")];
+            tensor<fp16, []> var_7493_to_fp16 = const()[name = tensor<string, []>("op_7493_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_719_cast_fp16 = mul(x = var_7492_cast_fp16, y = var_7493_to_fp16)[name = tensor<string, []>("aw_chunk_719_cast_fp16")];
+            tensor<string, []> var_7496_equation_0 = const()[name = tensor<string, []>("op_7496_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7496_cast_fp16 = einsum(equation = var_7496_equation_0, values = (var_7330_cast_fp16, var_7140_cast_fp16))[name = tensor<string, []>("op_7496_cast_fp16")];
+            tensor<fp16, []> var_7497_to_fp16 = const()[name = tensor<string, []>("op_7497_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_721_cast_fp16 = mul(x = var_7496_cast_fp16, y = var_7497_to_fp16)[name = tensor<string, []>("aw_chunk_721_cast_fp16")];
+            tensor<string, []> var_7500_equation_0 = const()[name = tensor<string, []>("op_7500_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7500_cast_fp16 = einsum(equation = var_7500_equation_0, values = (var_7330_cast_fp16, var_7147_cast_fp16))[name = tensor<string, []>("op_7500_cast_fp16")];
+            tensor<fp16, []> var_7501_to_fp16 = const()[name = tensor<string, []>("op_7501_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_723_cast_fp16 = mul(x = var_7500_cast_fp16, y = var_7501_to_fp16)[name = tensor<string, []>("aw_chunk_723_cast_fp16")];
+            tensor<string, []> var_7504_equation_0 = const()[name = tensor<string, []>("op_7504_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7504_cast_fp16 = einsum(equation = var_7504_equation_0, values = (var_7330_cast_fp16, var_7154_cast_fp16))[name = tensor<string, []>("op_7504_cast_fp16")];
+            tensor<fp16, []> var_7505_to_fp16 = const()[name = tensor<string, []>("op_7505_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_725_cast_fp16 = mul(x = var_7504_cast_fp16, y = var_7505_to_fp16)[name = tensor<string, []>("aw_chunk_725_cast_fp16")];
+            tensor<string, []> var_7508_equation_0 = const()[name = tensor<string, []>("op_7508_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7508_cast_fp16 = einsum(equation = var_7508_equation_0, values = (var_7330_cast_fp16, var_7161_cast_fp16))[name = tensor<string, []>("op_7508_cast_fp16")];
+            tensor<fp16, []> var_7509_to_fp16 = const()[name = tensor<string, []>("op_7509_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_727_cast_fp16 = mul(x = var_7508_cast_fp16, y = var_7509_to_fp16)[name = tensor<string, []>("aw_chunk_727_cast_fp16")];
+            tensor<string, []> var_7512_equation_0 = const()[name = tensor<string, []>("op_7512_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7512_cast_fp16 = einsum(equation = var_7512_equation_0, values = (var_7334_cast_fp16, var_7168_cast_fp16))[name = tensor<string, []>("op_7512_cast_fp16")];
+            tensor<fp16, []> var_7513_to_fp16 = const()[name = tensor<string, []>("op_7513_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_729_cast_fp16 = mul(x = var_7512_cast_fp16, y = var_7513_to_fp16)[name = tensor<string, []>("aw_chunk_729_cast_fp16")];
+            tensor<string, []> var_7516_equation_0 = const()[name = tensor<string, []>("op_7516_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7516_cast_fp16 = einsum(equation = var_7516_equation_0, values = (var_7334_cast_fp16, var_7175_cast_fp16))[name = tensor<string, []>("op_7516_cast_fp16")];
+            tensor<fp16, []> var_7517_to_fp16 = const()[name = tensor<string, []>("op_7517_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_731_cast_fp16 = mul(x = var_7516_cast_fp16, y = var_7517_to_fp16)[name = tensor<string, []>("aw_chunk_731_cast_fp16")];
+            tensor<string, []> var_7520_equation_0 = const()[name = tensor<string, []>("op_7520_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7520_cast_fp16 = einsum(equation = var_7520_equation_0, values = (var_7334_cast_fp16, var_7182_cast_fp16))[name = tensor<string, []>("op_7520_cast_fp16")];
+            tensor<fp16, []> var_7521_to_fp16 = const()[name = tensor<string, []>("op_7521_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_733_cast_fp16 = mul(x = var_7520_cast_fp16, y = var_7521_to_fp16)[name = tensor<string, []>("aw_chunk_733_cast_fp16")];
+            tensor<string, []> var_7524_equation_0 = const()[name = tensor<string, []>("op_7524_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7524_cast_fp16 = einsum(equation = var_7524_equation_0, values = (var_7334_cast_fp16, var_7189_cast_fp16))[name = tensor<string, []>("op_7524_cast_fp16")];
+            tensor<fp16, []> var_7525_to_fp16 = const()[name = tensor<string, []>("op_7525_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_735_cast_fp16 = mul(x = var_7524_cast_fp16, y = var_7525_to_fp16)[name = tensor<string, []>("aw_chunk_735_cast_fp16")];
+            tensor<string, []> var_7528_equation_0 = const()[name = tensor<string, []>("op_7528_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7528_cast_fp16 = einsum(equation = var_7528_equation_0, values = (var_7338_cast_fp16, var_7196_cast_fp16))[name = tensor<string, []>("op_7528_cast_fp16")];
+            tensor<fp16, []> var_7529_to_fp16 = const()[name = tensor<string, []>("op_7529_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_737_cast_fp16 = mul(x = var_7528_cast_fp16, y = var_7529_to_fp16)[name = tensor<string, []>("aw_chunk_737_cast_fp16")];
+            tensor<string, []> var_7532_equation_0 = const()[name = tensor<string, []>("op_7532_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7532_cast_fp16 = einsum(equation = var_7532_equation_0, values = (var_7338_cast_fp16, var_7203_cast_fp16))[name = tensor<string, []>("op_7532_cast_fp16")];
+            tensor<fp16, []> var_7533_to_fp16 = const()[name = tensor<string, []>("op_7533_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_739_cast_fp16 = mul(x = var_7532_cast_fp16, y = var_7533_to_fp16)[name = tensor<string, []>("aw_chunk_739_cast_fp16")];
+            tensor<string, []> var_7536_equation_0 = const()[name = tensor<string, []>("op_7536_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7536_cast_fp16 = einsum(equation = var_7536_equation_0, values = (var_7338_cast_fp16, var_7210_cast_fp16))[name = tensor<string, []>("op_7536_cast_fp16")];
+            tensor<fp16, []> var_7537_to_fp16 = const()[name = tensor<string, []>("op_7537_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_741_cast_fp16 = mul(x = var_7536_cast_fp16, y = var_7537_to_fp16)[name = tensor<string, []>("aw_chunk_741_cast_fp16")];
+            tensor<string, []> var_7540_equation_0 = const()[name = tensor<string, []>("op_7540_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7540_cast_fp16 = einsum(equation = var_7540_equation_0, values = (var_7338_cast_fp16, var_7217_cast_fp16))[name = tensor<string, []>("op_7540_cast_fp16")];
+            tensor<fp16, []> var_7541_to_fp16 = const()[name = tensor<string, []>("op_7541_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_743_cast_fp16 = mul(x = var_7540_cast_fp16, y = var_7541_to_fp16)[name = tensor<string, []>("aw_chunk_743_cast_fp16")];
+            tensor<string, []> var_7544_equation_0 = const()[name = tensor<string, []>("op_7544_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7544_cast_fp16 = einsum(equation = var_7544_equation_0, values = (var_7342_cast_fp16, var_7224_cast_fp16))[name = tensor<string, []>("op_7544_cast_fp16")];
+            tensor<fp16, []> var_7545_to_fp16 = const()[name = tensor<string, []>("op_7545_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_745_cast_fp16 = mul(x = var_7544_cast_fp16, y = var_7545_to_fp16)[name = tensor<string, []>("aw_chunk_745_cast_fp16")];
+            tensor<string, []> var_7548_equation_0 = const()[name = tensor<string, []>("op_7548_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7548_cast_fp16 = einsum(equation = var_7548_equation_0, values = (var_7342_cast_fp16, var_7231_cast_fp16))[name = tensor<string, []>("op_7548_cast_fp16")];
+            tensor<fp16, []> var_7549_to_fp16 = const()[name = tensor<string, []>("op_7549_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_747_cast_fp16 = mul(x = var_7548_cast_fp16, y = var_7549_to_fp16)[name = tensor<string, []>("aw_chunk_747_cast_fp16")];
+            tensor<string, []> var_7552_equation_0 = const()[name = tensor<string, []>("op_7552_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7552_cast_fp16 = einsum(equation = var_7552_equation_0, values = (var_7342_cast_fp16, var_7238_cast_fp16))[name = tensor<string, []>("op_7552_cast_fp16")];
+            tensor<fp16, []> var_7553_to_fp16 = const()[name = tensor<string, []>("op_7553_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_749_cast_fp16 = mul(x = var_7552_cast_fp16, y = var_7553_to_fp16)[name = tensor<string, []>("aw_chunk_749_cast_fp16")];
+            tensor<string, []> var_7556_equation_0 = const()[name = tensor<string, []>("op_7556_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7556_cast_fp16 = einsum(equation = var_7556_equation_0, values = (var_7342_cast_fp16, var_7245_cast_fp16))[name = tensor<string, []>("op_7556_cast_fp16")];
+            tensor<fp16, []> var_7557_to_fp16 = const()[name = tensor<string, []>("op_7557_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_751_cast_fp16 = mul(x = var_7556_cast_fp16, y = var_7557_to_fp16)[name = tensor<string, []>("aw_chunk_751_cast_fp16")];
+            tensor<string, []> var_7560_equation_0 = const()[name = tensor<string, []>("op_7560_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7560_cast_fp16 = einsum(equation = var_7560_equation_0, values = (var_7346_cast_fp16, var_7252_cast_fp16))[name = tensor<string, []>("op_7560_cast_fp16")];
+            tensor<fp16, []> var_7561_to_fp16 = const()[name = tensor<string, []>("op_7561_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_753_cast_fp16 = mul(x = var_7560_cast_fp16, y = var_7561_to_fp16)[name = tensor<string, []>("aw_chunk_753_cast_fp16")];
+            tensor<string, []> var_7564_equation_0 = const()[name = tensor<string, []>("op_7564_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7564_cast_fp16 = einsum(equation = var_7564_equation_0, values = (var_7346_cast_fp16, var_7259_cast_fp16))[name = tensor<string, []>("op_7564_cast_fp16")];
+            tensor<fp16, []> var_7565_to_fp16 = const()[name = tensor<string, []>("op_7565_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_755_cast_fp16 = mul(x = var_7564_cast_fp16, y = var_7565_to_fp16)[name = tensor<string, []>("aw_chunk_755_cast_fp16")];
+            tensor<string, []> var_7568_equation_0 = const()[name = tensor<string, []>("op_7568_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7568_cast_fp16 = einsum(equation = var_7568_equation_0, values = (var_7346_cast_fp16, var_7266_cast_fp16))[name = tensor<string, []>("op_7568_cast_fp16")];
+            tensor<fp16, []> var_7569_to_fp16 = const()[name = tensor<string, []>("op_7569_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_757_cast_fp16 = mul(x = var_7568_cast_fp16, y = var_7569_to_fp16)[name = tensor<string, []>("aw_chunk_757_cast_fp16")];
+            tensor<string, []> var_7572_equation_0 = const()[name = tensor<string, []>("op_7572_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7572_cast_fp16 = einsum(equation = var_7572_equation_0, values = (var_7346_cast_fp16, var_7273_cast_fp16))[name = tensor<string, []>("op_7572_cast_fp16")];
+            tensor<fp16, []> var_7573_to_fp16 = const()[name = tensor<string, []>("op_7573_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_759_cast_fp16 = mul(x = var_7572_cast_fp16, y = var_7573_to_fp16)[name = tensor<string, []>("aw_chunk_759_cast_fp16")];
+            tensor<string, []> var_7576_equation_0 = const()[name = tensor<string, []>("op_7576_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7576_cast_fp16 = einsum(equation = var_7576_equation_0, values = (var_7350_cast_fp16, var_7280_cast_fp16))[name = tensor<string, []>("op_7576_cast_fp16")];
+            tensor<fp16, []> var_7577_to_fp16 = const()[name = tensor<string, []>("op_7577_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_761_cast_fp16 = mul(x = var_7576_cast_fp16, y = var_7577_to_fp16)[name = tensor<string, []>("aw_chunk_761_cast_fp16")];
+            tensor<string, []> var_7580_equation_0 = const()[name = tensor<string, []>("op_7580_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7580_cast_fp16 = einsum(equation = var_7580_equation_0, values = (var_7350_cast_fp16, var_7287_cast_fp16))[name = tensor<string, []>("op_7580_cast_fp16")];
+            tensor<fp16, []> var_7581_to_fp16 = const()[name = tensor<string, []>("op_7581_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_763_cast_fp16 = mul(x = var_7580_cast_fp16, y = var_7581_to_fp16)[name = tensor<string, []>("aw_chunk_763_cast_fp16")];
+            tensor<string, []> var_7584_equation_0 = const()[name = tensor<string, []>("op_7584_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7584_cast_fp16 = einsum(equation = var_7584_equation_0, values = (var_7350_cast_fp16, var_7294_cast_fp16))[name = tensor<string, []>("op_7584_cast_fp16")];
+            tensor<fp16, []> var_7585_to_fp16 = const()[name = tensor<string, []>("op_7585_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_765_cast_fp16 = mul(x = var_7584_cast_fp16, y = var_7585_to_fp16)[name = tensor<string, []>("aw_chunk_765_cast_fp16")];
+            tensor<string, []> var_7588_equation_0 = const()[name = tensor<string, []>("op_7588_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7588_cast_fp16 = einsum(equation = var_7588_equation_0, values = (var_7350_cast_fp16, var_7301_cast_fp16))[name = tensor<string, []>("op_7588_cast_fp16")];
+            tensor<fp16, []> var_7589_to_fp16 = const()[name = tensor<string, []>("op_7589_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_767_cast_fp16 = mul(x = var_7588_cast_fp16, y = var_7589_to_fp16)[name = tensor<string, []>("aw_chunk_767_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7591_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_673_cast_fp16)[name = tensor<string, []>("op_7591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7592_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_675_cast_fp16)[name = tensor<string, []>("op_7592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7593_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_677_cast_fp16)[name = tensor<string, []>("op_7593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7594_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_679_cast_fp16)[name = tensor<string, []>("op_7594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7595_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_681_cast_fp16)[name = tensor<string, []>("op_7595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7596_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_683_cast_fp16)[name = tensor<string, []>("op_7596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7597_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_685_cast_fp16)[name = tensor<string, []>("op_7597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7598_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_687_cast_fp16)[name = tensor<string, []>("op_7598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7599_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_689_cast_fp16)[name = tensor<string, []>("op_7599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7600_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_691_cast_fp16)[name = tensor<string, []>("op_7600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7601_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_693_cast_fp16)[name = tensor<string, []>("op_7601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7602_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_695_cast_fp16)[name = tensor<string, []>("op_7602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7603_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_697_cast_fp16)[name = tensor<string, []>("op_7603_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7604_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_699_cast_fp16)[name = tensor<string, []>("op_7604_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7605_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_701_cast_fp16)[name = tensor<string, []>("op_7605_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7606_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_703_cast_fp16)[name = tensor<string, []>("op_7606_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7607_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_705_cast_fp16)[name = tensor<string, []>("op_7607_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7608_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_707_cast_fp16)[name = tensor<string, []>("op_7608_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7609_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_709_cast_fp16)[name = tensor<string, []>("op_7609_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7610_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_711_cast_fp16)[name = tensor<string, []>("op_7610_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7611_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_713_cast_fp16)[name = tensor<string, []>("op_7611_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7612_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_715_cast_fp16)[name = tensor<string, []>("op_7612_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7613_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_717_cast_fp16)[name = tensor<string, []>("op_7613_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7614_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_719_cast_fp16)[name = tensor<string, []>("op_7614_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7615_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_721_cast_fp16)[name = tensor<string, []>("op_7615_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7616_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_723_cast_fp16)[name = tensor<string, []>("op_7616_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7617_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_725_cast_fp16)[name = tensor<string, []>("op_7617_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7618_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_727_cast_fp16)[name = tensor<string, []>("op_7618_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7619_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_729_cast_fp16)[name = tensor<string, []>("op_7619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7620_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_731_cast_fp16)[name = tensor<string, []>("op_7620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7621_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_733_cast_fp16)[name = tensor<string, []>("op_7621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7622_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_735_cast_fp16)[name = tensor<string, []>("op_7622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7623_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_737_cast_fp16)[name = tensor<string, []>("op_7623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7624_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_739_cast_fp16)[name = tensor<string, []>("op_7624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7625_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_741_cast_fp16)[name = tensor<string, []>("op_7625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7626_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_743_cast_fp16)[name = tensor<string, []>("op_7626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7627_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_745_cast_fp16)[name = tensor<string, []>("op_7627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7628_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_747_cast_fp16)[name = tensor<string, []>("op_7628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7629_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_749_cast_fp16)[name = tensor<string, []>("op_7629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7630_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_751_cast_fp16)[name = tensor<string, []>("op_7630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7631_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_753_cast_fp16)[name = tensor<string, []>("op_7631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7632_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_755_cast_fp16)[name = tensor<string, []>("op_7632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7633_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_757_cast_fp16)[name = tensor<string, []>("op_7633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7634_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_759_cast_fp16)[name = tensor<string, []>("op_7634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7635_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_761_cast_fp16)[name = tensor<string, []>("op_7635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7636_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_763_cast_fp16)[name = tensor<string, []>("op_7636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7637_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_765_cast_fp16)[name = tensor<string, []>("op_7637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_7638_cast_fp16 = softmax(axis = var_6864, x = aw_chunk_767_cast_fp16)[name = tensor<string, []>("op_7638_cast_fp16")];
+            tensor<string, []> var_7640_equation_0 = const()[name = tensor<string, []>("op_7640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7640_cast_fp16 = einsum(equation = var_7640_equation_0, values = (var_7352_cast_fp16, var_7591_cast_fp16))[name = tensor<string, []>("op_7640_cast_fp16")];
+            tensor<string, []> var_7642_equation_0 = const()[name = tensor<string, []>("op_7642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7642_cast_fp16 = einsum(equation = var_7642_equation_0, values = (var_7352_cast_fp16, var_7592_cast_fp16))[name = tensor<string, []>("op_7642_cast_fp16")];
+            tensor<string, []> var_7644_equation_0 = const()[name = tensor<string, []>("op_7644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7644_cast_fp16 = einsum(equation = var_7644_equation_0, values = (var_7352_cast_fp16, var_7593_cast_fp16))[name = tensor<string, []>("op_7644_cast_fp16")];
+            tensor<string, []> var_7646_equation_0 = const()[name = tensor<string, []>("op_7646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7646_cast_fp16 = einsum(equation = var_7646_equation_0, values = (var_7352_cast_fp16, var_7594_cast_fp16))[name = tensor<string, []>("op_7646_cast_fp16")];
+            tensor<string, []> var_7648_equation_0 = const()[name = tensor<string, []>("op_7648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7648_cast_fp16 = einsum(equation = var_7648_equation_0, values = (var_7356_cast_fp16, var_7595_cast_fp16))[name = tensor<string, []>("op_7648_cast_fp16")];
+            tensor<string, []> var_7650_equation_0 = const()[name = tensor<string, []>("op_7650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7650_cast_fp16 = einsum(equation = var_7650_equation_0, values = (var_7356_cast_fp16, var_7596_cast_fp16))[name = tensor<string, []>("op_7650_cast_fp16")];
+            tensor<string, []> var_7652_equation_0 = const()[name = tensor<string, []>("op_7652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7652_cast_fp16 = einsum(equation = var_7652_equation_0, values = (var_7356_cast_fp16, var_7597_cast_fp16))[name = tensor<string, []>("op_7652_cast_fp16")];
+            tensor<string, []> var_7654_equation_0 = const()[name = tensor<string, []>("op_7654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7654_cast_fp16 = einsum(equation = var_7654_equation_0, values = (var_7356_cast_fp16, var_7598_cast_fp16))[name = tensor<string, []>("op_7654_cast_fp16")];
+            tensor<string, []> var_7656_equation_0 = const()[name = tensor<string, []>("op_7656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7656_cast_fp16 = einsum(equation = var_7656_equation_0, values = (var_7360_cast_fp16, var_7599_cast_fp16))[name = tensor<string, []>("op_7656_cast_fp16")];
+            tensor<string, []> var_7658_equation_0 = const()[name = tensor<string, []>("op_7658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7658_cast_fp16 = einsum(equation = var_7658_equation_0, values = (var_7360_cast_fp16, var_7600_cast_fp16))[name = tensor<string, []>("op_7658_cast_fp16")];
+            tensor<string, []> var_7660_equation_0 = const()[name = tensor<string, []>("op_7660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7660_cast_fp16 = einsum(equation = var_7660_equation_0, values = (var_7360_cast_fp16, var_7601_cast_fp16))[name = tensor<string, []>("op_7660_cast_fp16")];
+            tensor<string, []> var_7662_equation_0 = const()[name = tensor<string, []>("op_7662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7662_cast_fp16 = einsum(equation = var_7662_equation_0, values = (var_7360_cast_fp16, var_7602_cast_fp16))[name = tensor<string, []>("op_7662_cast_fp16")];
+            tensor<string, []> var_7664_equation_0 = const()[name = tensor<string, []>("op_7664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7664_cast_fp16 = einsum(equation = var_7664_equation_0, values = (var_7364_cast_fp16, var_7603_cast_fp16))[name = tensor<string, []>("op_7664_cast_fp16")];
+            tensor<string, []> var_7666_equation_0 = const()[name = tensor<string, []>("op_7666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7666_cast_fp16 = einsum(equation = var_7666_equation_0, values = (var_7364_cast_fp16, var_7604_cast_fp16))[name = tensor<string, []>("op_7666_cast_fp16")];
+            tensor<string, []> var_7668_equation_0 = const()[name = tensor<string, []>("op_7668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7668_cast_fp16 = einsum(equation = var_7668_equation_0, values = (var_7364_cast_fp16, var_7605_cast_fp16))[name = tensor<string, []>("op_7668_cast_fp16")];
+            tensor<string, []> var_7670_equation_0 = const()[name = tensor<string, []>("op_7670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7670_cast_fp16 = einsum(equation = var_7670_equation_0, values = (var_7364_cast_fp16, var_7606_cast_fp16))[name = tensor<string, []>("op_7670_cast_fp16")];
+            tensor<string, []> var_7672_equation_0 = const()[name = tensor<string, []>("op_7672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7672_cast_fp16 = einsum(equation = var_7672_equation_0, values = (var_7368_cast_fp16, var_7607_cast_fp16))[name = tensor<string, []>("op_7672_cast_fp16")];
+            tensor<string, []> var_7674_equation_0 = const()[name = tensor<string, []>("op_7674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7674_cast_fp16 = einsum(equation = var_7674_equation_0, values = (var_7368_cast_fp16, var_7608_cast_fp16))[name = tensor<string, []>("op_7674_cast_fp16")];
+            tensor<string, []> var_7676_equation_0 = const()[name = tensor<string, []>("op_7676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7676_cast_fp16 = einsum(equation = var_7676_equation_0, values = (var_7368_cast_fp16, var_7609_cast_fp16))[name = tensor<string, []>("op_7676_cast_fp16")];
+            tensor<string, []> var_7678_equation_0 = const()[name = tensor<string, []>("op_7678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7678_cast_fp16 = einsum(equation = var_7678_equation_0, values = (var_7368_cast_fp16, var_7610_cast_fp16))[name = tensor<string, []>("op_7678_cast_fp16")];
+            tensor<string, []> var_7680_equation_0 = const()[name = tensor<string, []>("op_7680_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7680_cast_fp16 = einsum(equation = var_7680_equation_0, values = (var_7372_cast_fp16, var_7611_cast_fp16))[name = tensor<string, []>("op_7680_cast_fp16")];
+            tensor<string, []> var_7682_equation_0 = const()[name = tensor<string, []>("op_7682_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7682_cast_fp16 = einsum(equation = var_7682_equation_0, values = (var_7372_cast_fp16, var_7612_cast_fp16))[name = tensor<string, []>("op_7682_cast_fp16")];
+            tensor<string, []> var_7684_equation_0 = const()[name = tensor<string, []>("op_7684_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7684_cast_fp16 = einsum(equation = var_7684_equation_0, values = (var_7372_cast_fp16, var_7613_cast_fp16))[name = tensor<string, []>("op_7684_cast_fp16")];
+            tensor<string, []> var_7686_equation_0 = const()[name = tensor<string, []>("op_7686_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7686_cast_fp16 = einsum(equation = var_7686_equation_0, values = (var_7372_cast_fp16, var_7614_cast_fp16))[name = tensor<string, []>("op_7686_cast_fp16")];
+            tensor<string, []> var_7688_equation_0 = const()[name = tensor<string, []>("op_7688_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7688_cast_fp16 = einsum(equation = var_7688_equation_0, values = (var_7376_cast_fp16, var_7615_cast_fp16))[name = tensor<string, []>("op_7688_cast_fp16")];
+            tensor<string, []> var_7690_equation_0 = const()[name = tensor<string, []>("op_7690_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7690_cast_fp16 = einsum(equation = var_7690_equation_0, values = (var_7376_cast_fp16, var_7616_cast_fp16))[name = tensor<string, []>("op_7690_cast_fp16")];
+            tensor<string, []> var_7692_equation_0 = const()[name = tensor<string, []>("op_7692_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7692_cast_fp16 = einsum(equation = var_7692_equation_0, values = (var_7376_cast_fp16, var_7617_cast_fp16))[name = tensor<string, []>("op_7692_cast_fp16")];
+            tensor<string, []> var_7694_equation_0 = const()[name = tensor<string, []>("op_7694_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7694_cast_fp16 = einsum(equation = var_7694_equation_0, values = (var_7376_cast_fp16, var_7618_cast_fp16))[name = tensor<string, []>("op_7694_cast_fp16")];
+            tensor<string, []> var_7696_equation_0 = const()[name = tensor<string, []>("op_7696_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7696_cast_fp16 = einsum(equation = var_7696_equation_0, values = (var_7380_cast_fp16, var_7619_cast_fp16))[name = tensor<string, []>("op_7696_cast_fp16")];
+            tensor<string, []> var_7698_equation_0 = const()[name = tensor<string, []>("op_7698_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7698_cast_fp16 = einsum(equation = var_7698_equation_0, values = (var_7380_cast_fp16, var_7620_cast_fp16))[name = tensor<string, []>("op_7698_cast_fp16")];
+            tensor<string, []> var_7700_equation_0 = const()[name = tensor<string, []>("op_7700_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7700_cast_fp16 = einsum(equation = var_7700_equation_0, values = (var_7380_cast_fp16, var_7621_cast_fp16))[name = tensor<string, []>("op_7700_cast_fp16")];
+            tensor<string, []> var_7702_equation_0 = const()[name = tensor<string, []>("op_7702_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7702_cast_fp16 = einsum(equation = var_7702_equation_0, values = (var_7380_cast_fp16, var_7622_cast_fp16))[name = tensor<string, []>("op_7702_cast_fp16")];
+            tensor<string, []> var_7704_equation_0 = const()[name = tensor<string, []>("op_7704_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7704_cast_fp16 = einsum(equation = var_7704_equation_0, values = (var_7384_cast_fp16, var_7623_cast_fp16))[name = tensor<string, []>("op_7704_cast_fp16")];
+            tensor<string, []> var_7706_equation_0 = const()[name = tensor<string, []>("op_7706_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7706_cast_fp16 = einsum(equation = var_7706_equation_0, values = (var_7384_cast_fp16, var_7624_cast_fp16))[name = tensor<string, []>("op_7706_cast_fp16")];
+            tensor<string, []> var_7708_equation_0 = const()[name = tensor<string, []>("op_7708_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7708_cast_fp16 = einsum(equation = var_7708_equation_0, values = (var_7384_cast_fp16, var_7625_cast_fp16))[name = tensor<string, []>("op_7708_cast_fp16")];
+            tensor<string, []> var_7710_equation_0 = const()[name = tensor<string, []>("op_7710_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7710_cast_fp16 = einsum(equation = var_7710_equation_0, values = (var_7384_cast_fp16, var_7626_cast_fp16))[name = tensor<string, []>("op_7710_cast_fp16")];
+            tensor<string, []> var_7712_equation_0 = const()[name = tensor<string, []>("op_7712_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7712_cast_fp16 = einsum(equation = var_7712_equation_0, values = (var_7388_cast_fp16, var_7627_cast_fp16))[name = tensor<string, []>("op_7712_cast_fp16")];
+            tensor<string, []> var_7714_equation_0 = const()[name = tensor<string, []>("op_7714_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7714_cast_fp16 = einsum(equation = var_7714_equation_0, values = (var_7388_cast_fp16, var_7628_cast_fp16))[name = tensor<string, []>("op_7714_cast_fp16")];
+            tensor<string, []> var_7716_equation_0 = const()[name = tensor<string, []>("op_7716_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7716_cast_fp16 = einsum(equation = var_7716_equation_0, values = (var_7388_cast_fp16, var_7629_cast_fp16))[name = tensor<string, []>("op_7716_cast_fp16")];
+            tensor<string, []> var_7718_equation_0 = const()[name = tensor<string, []>("op_7718_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7718_cast_fp16 = einsum(equation = var_7718_equation_0, values = (var_7388_cast_fp16, var_7630_cast_fp16))[name = tensor<string, []>("op_7718_cast_fp16")];
+            tensor<string, []> var_7720_equation_0 = const()[name = tensor<string, []>("op_7720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7720_cast_fp16 = einsum(equation = var_7720_equation_0, values = (var_7392_cast_fp16, var_7631_cast_fp16))[name = tensor<string, []>("op_7720_cast_fp16")];
+            tensor<string, []> var_7722_equation_0 = const()[name = tensor<string, []>("op_7722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7722_cast_fp16 = einsum(equation = var_7722_equation_0, values = (var_7392_cast_fp16, var_7632_cast_fp16))[name = tensor<string, []>("op_7722_cast_fp16")];
+            tensor<string, []> var_7724_equation_0 = const()[name = tensor<string, []>("op_7724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7724_cast_fp16 = einsum(equation = var_7724_equation_0, values = (var_7392_cast_fp16, var_7633_cast_fp16))[name = tensor<string, []>("op_7724_cast_fp16")];
+            tensor<string, []> var_7726_equation_0 = const()[name = tensor<string, []>("op_7726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7726_cast_fp16 = einsum(equation = var_7726_equation_0, values = (var_7392_cast_fp16, var_7634_cast_fp16))[name = tensor<string, []>("op_7726_cast_fp16")];
+            tensor<string, []> var_7728_equation_0 = const()[name = tensor<string, []>("op_7728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7728_cast_fp16 = einsum(equation = var_7728_equation_0, values = (var_7396_cast_fp16, var_7635_cast_fp16))[name = tensor<string, []>("op_7728_cast_fp16")];
+            tensor<string, []> var_7730_equation_0 = const()[name = tensor<string, []>("op_7730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7730_cast_fp16 = einsum(equation = var_7730_equation_0, values = (var_7396_cast_fp16, var_7636_cast_fp16))[name = tensor<string, []>("op_7730_cast_fp16")];
+            tensor<string, []> var_7732_equation_0 = const()[name = tensor<string, []>("op_7732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7732_cast_fp16 = einsum(equation = var_7732_equation_0, values = (var_7396_cast_fp16, var_7637_cast_fp16))[name = tensor<string, []>("op_7732_cast_fp16")];
+            tensor<string, []> var_7734_equation_0 = const()[name = tensor<string, []>("op_7734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_7734_cast_fp16 = einsum(equation = var_7734_equation_0, values = (var_7396_cast_fp16, var_7638_cast_fp16))[name = tensor<string, []>("op_7734_cast_fp16")];
+            tensor<bool, []> var_7736_interleave_0 = const()[name = tensor<string, []>("op_7736_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7736_cast_fp16 = concat(axis = var_6847, interleave = var_7736_interleave_0, values = (var_7640_cast_fp16, var_7642_cast_fp16, var_7644_cast_fp16, var_7646_cast_fp16))[name = tensor<string, []>("op_7736_cast_fp16")];
+            tensor<bool, []> var_7738_interleave_0 = const()[name = tensor<string, []>("op_7738_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7738_cast_fp16 = concat(axis = var_6847, interleave = var_7738_interleave_0, values = (var_7648_cast_fp16, var_7650_cast_fp16, var_7652_cast_fp16, var_7654_cast_fp16))[name = tensor<string, []>("op_7738_cast_fp16")];
+            tensor<bool, []> var_7740_interleave_0 = const()[name = tensor<string, []>("op_7740_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7740_cast_fp16 = concat(axis = var_6847, interleave = var_7740_interleave_0, values = (var_7656_cast_fp16, var_7658_cast_fp16, var_7660_cast_fp16, var_7662_cast_fp16))[name = tensor<string, []>("op_7740_cast_fp16")];
+            tensor<bool, []> var_7742_interleave_0 = const()[name = tensor<string, []>("op_7742_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7742_cast_fp16 = concat(axis = var_6847, interleave = var_7742_interleave_0, values = (var_7664_cast_fp16, var_7666_cast_fp16, var_7668_cast_fp16, var_7670_cast_fp16))[name = tensor<string, []>("op_7742_cast_fp16")];
+            tensor<bool, []> var_7744_interleave_0 = const()[name = tensor<string, []>("op_7744_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7744_cast_fp16 = concat(axis = var_6847, interleave = var_7744_interleave_0, values = (var_7672_cast_fp16, var_7674_cast_fp16, var_7676_cast_fp16, var_7678_cast_fp16))[name = tensor<string, []>("op_7744_cast_fp16")];
+            tensor<bool, []> var_7746_interleave_0 = const()[name = tensor<string, []>("op_7746_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7746_cast_fp16 = concat(axis = var_6847, interleave = var_7746_interleave_0, values = (var_7680_cast_fp16, var_7682_cast_fp16, var_7684_cast_fp16, var_7686_cast_fp16))[name = tensor<string, []>("op_7746_cast_fp16")];
+            tensor<bool, []> var_7748_interleave_0 = const()[name = tensor<string, []>("op_7748_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7748_cast_fp16 = concat(axis = var_6847, interleave = var_7748_interleave_0, values = (var_7688_cast_fp16, var_7690_cast_fp16, var_7692_cast_fp16, var_7694_cast_fp16))[name = tensor<string, []>("op_7748_cast_fp16")];
+            tensor<bool, []> var_7750_interleave_0 = const()[name = tensor<string, []>("op_7750_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7750_cast_fp16 = concat(axis = var_6847, interleave = var_7750_interleave_0, values = (var_7696_cast_fp16, var_7698_cast_fp16, var_7700_cast_fp16, var_7702_cast_fp16))[name = tensor<string, []>("op_7750_cast_fp16")];
+            tensor<bool, []> var_7752_interleave_0 = const()[name = tensor<string, []>("op_7752_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7752_cast_fp16 = concat(axis = var_6847, interleave = var_7752_interleave_0, values = (var_7704_cast_fp16, var_7706_cast_fp16, var_7708_cast_fp16, var_7710_cast_fp16))[name = tensor<string, []>("op_7752_cast_fp16")];
+            tensor<bool, []> var_7754_interleave_0 = const()[name = tensor<string, []>("op_7754_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7754_cast_fp16 = concat(axis = var_6847, interleave = var_7754_interleave_0, values = (var_7712_cast_fp16, var_7714_cast_fp16, var_7716_cast_fp16, var_7718_cast_fp16))[name = tensor<string, []>("op_7754_cast_fp16")];
+            tensor<bool, []> var_7756_interleave_0 = const()[name = tensor<string, []>("op_7756_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7756_cast_fp16 = concat(axis = var_6847, interleave = var_7756_interleave_0, values = (var_7720_cast_fp16, var_7722_cast_fp16, var_7724_cast_fp16, var_7726_cast_fp16))[name = tensor<string, []>("op_7756_cast_fp16")];
+            tensor<bool, []> var_7758_interleave_0 = const()[name = tensor<string, []>("op_7758_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7758_cast_fp16 = concat(axis = var_6847, interleave = var_7758_interleave_0, values = (var_7728_cast_fp16, var_7730_cast_fp16, var_7732_cast_fp16, var_7734_cast_fp16))[name = tensor<string, []>("op_7758_cast_fp16")];
+            tensor<bool, []> input_57_interleave_0 = const()[name = tensor<string, []>("input_57_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_57_cast_fp16 = concat(axis = var_6864, interleave = input_57_interleave_0, values = (var_7736_cast_fp16, var_7738_cast_fp16, var_7740_cast_fp16, var_7742_cast_fp16, var_7744_cast_fp16, var_7746_cast_fp16, var_7748_cast_fp16, var_7750_cast_fp16, var_7752_cast_fp16, var_7754_cast_fp16, var_7756_cast_fp16, var_7758_cast_fp16))[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<int32, [2]> var_7763 = const()[name = tensor<string, []>("op_7763"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7765 = const()[name = tensor<string, []>("op_7765"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_31_pad_type_0 = const()[name = tensor<string, []>("obj_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_31_pad_0 = const()[name = tensor<string, []>("obj_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108989952)))];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110169664)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_31_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = var_7765, groups = var_6864, pad = obj_31_pad_0, pad_type = obj_31_pad_type_0, strides = var_7763, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("obj_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> var_7771 = const()[name = tensor<string, []>("op_7771"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_31_cast_fp16 = reduce_mean(axes = var_7771, keep_dims = var_6865, x = inputs_31_cast_fp16)[name = tensor<string, []>("channels_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_31_cast_fp16 = sub(x = inputs_31_cast_fp16, y = channels_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = zero_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_sq_31_cast_fp16")];
+            tensor<int32, [1]> var_7775 = const()[name = tensor<string, []>("op_7775"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_7776_cast_fp16 = reduce_mean(axes = var_7775, keep_dims = var_6865, x = zero_mean_sq_31_cast_fp16)[name = tensor<string, []>("op_7776_cast_fp16")];
+            tensor<fp16, []> var_7777_to_fp16 = const()[name = tensor<string, []>("op_7777_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_7778_cast_fp16 = add(x = var_7776_cast_fp16, y = var_7777_to_fp16)[name = tensor<string, []>("op_7778_cast_fp16")];
+            tensor<fp16, []> denom_31_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_31_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0_to_fp16, x = var_7778_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = denom_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [768]> input_59_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_59_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110171264)))];
+            tensor<fp16, [768]> input_59_beta_0_to_fp16 = const()[name = tensor<string, []>("input_59_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110172864)))];
+            tensor<fp16, []> input_59_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_59_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<int32, [2]> var_7789 = const()[name = tensor<string, []>("op_7789"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7791 = const()[name = tensor<string, []>("op_7791"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_61_pad_type_0 = const()[name = tensor<string, []>("input_61_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_61_pad_0 = const()[name = tensor<string, []>("input_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110174464)))];
+            tensor<fp16, [3072]> layers_7_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114893120)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_61_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = var_7791, groups = var_6864, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = var_7789, weight = layers_7_fc1_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> input_63_mode_0 = const()[name = tensor<string, []>("input_63_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<int32, [2]> var_7797 = const()[name = tensor<string, []>("op_7797"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7799 = const()[name = tensor<string, []>("op_7799"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_19_pad_type_0 = const()[name = tensor<string, []>("hidden_states_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_19_pad_0 = const()[name = tensor<string, []>("hidden_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114899328)))];
+            tensor<fp16, [768]> layers_7_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119617984)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_19_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = var_7799, groups = var_6864, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = var_7797, weight = layers_7_fc2_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_7806 = const()[name = tensor<string, []>("op_7806"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_7823 = const()[name = tensor<string, []>("op_7823"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_7824 = const()[name = tensor<string, []>("op_7824"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_7834 = const()[name = tensor<string, []>("op_7834"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_33_cast_fp16 = reduce_mean(axes = var_7834, keep_dims = var_7824, x = inputs_33_cast_fp16)[name = tensor<string, []>("channels_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_33_cast_fp16 = sub(x = inputs_33_cast_fp16, y = channels_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = zero_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_sq_33_cast_fp16")];
+            tensor<int32, [1]> var_7838 = const()[name = tensor<string, []>("op_7838"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_7839_cast_fp16 = reduce_mean(axes = var_7838, keep_dims = var_7824, x = zero_mean_sq_33_cast_fp16)[name = tensor<string, []>("op_7839_cast_fp16")];
+            tensor<fp16, []> var_7840_to_fp16 = const()[name = tensor<string, []>("op_7840_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_7841_cast_fp16 = add(x = var_7839_cast_fp16, y = var_7840_to_fp16)[name = tensor<string, []>("op_7841_cast_fp16")];
+            tensor<fp16, []> denom_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0_to_fp16, x = var_7841_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = denom_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119619584)))];
+            tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119621184)))];
+            tensor<fp16, []> obj_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_33_cast_fp16")];
+            tensor<int32, [2]> var_7856 = const()[name = tensor<string, []>("op_7856"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7858 = const()[name = tensor<string, []>("op_7858"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119622784)))];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120802496)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_17_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = var_7858, groups = var_7823, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_7856, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<int32, [2]> var_7862 = const()[name = tensor<string, []>("op_7862"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7864 = const()[name = tensor<string, []>("op_7864"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_17_pad_type_0 = const()[name = tensor<string, []>("key_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_17_pad_0 = const()[name = tensor<string, []>("key_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120804096)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_17_cast_fp16 = conv(dilations = var_7864, groups = var_7823, pad = key_17_pad_0, pad_type = key_17_pad_type_0, strides = var_7862, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<int32, [2]> var_7869 = const()[name = tensor<string, []>("op_7869"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_7871 = const()[name = tensor<string, []>("op_7871"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_17_pad_type_0 = const()[name = tensor<string, []>("value_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_17_pad_0 = const()[name = tensor<string, []>("value_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121983808)))];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123163520)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = var_7871, groups = var_7823, pad = value_17_pad_0, pad_type = value_17_pad_type_0, strides = var_7869, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_7878_begin_0 = const()[name = tensor<string, []>("op_7878_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7878_end_0 = const()[name = tensor<string, []>("op_7878_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7878_end_mask_0 = const()[name = tensor<string, []>("op_7878_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7878_cast_fp16 = slice_by_index(begin = var_7878_begin_0, end = var_7878_end_0, end_mask = var_7878_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7878_cast_fp16")];
+            tensor<int32, [4]> var_7882_begin_0 = const()[name = tensor<string, []>("op_7882_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_7882_end_0 = const()[name = tensor<string, []>("op_7882_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_7882_end_mask_0 = const()[name = tensor<string, []>("op_7882_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7882_cast_fp16 = slice_by_index(begin = var_7882_begin_0, end = var_7882_end_0, end_mask = var_7882_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7882_cast_fp16")];
+            tensor<int32, [4]> var_7886_begin_0 = const()[name = tensor<string, []>("op_7886_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_7886_end_0 = const()[name = tensor<string, []>("op_7886_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_7886_end_mask_0 = const()[name = tensor<string, []>("op_7886_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7886_cast_fp16 = slice_by_index(begin = var_7886_begin_0, end = var_7886_end_0, end_mask = var_7886_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7886_cast_fp16")];
+            tensor<int32, [4]> var_7890_begin_0 = const()[name = tensor<string, []>("op_7890_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_7890_end_0 = const()[name = tensor<string, []>("op_7890_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_7890_end_mask_0 = const()[name = tensor<string, []>("op_7890_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7890_cast_fp16 = slice_by_index(begin = var_7890_begin_0, end = var_7890_end_0, end_mask = var_7890_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7890_cast_fp16")];
+            tensor<int32, [4]> var_7894_begin_0 = const()[name = tensor<string, []>("op_7894_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_7894_end_0 = const()[name = tensor<string, []>("op_7894_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_7894_end_mask_0 = const()[name = tensor<string, []>("op_7894_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7894_cast_fp16 = slice_by_index(begin = var_7894_begin_0, end = var_7894_end_0, end_mask = var_7894_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7894_cast_fp16")];
+            tensor<int32, [4]> var_7898_begin_0 = const()[name = tensor<string, []>("op_7898_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_7898_end_0 = const()[name = tensor<string, []>("op_7898_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_7898_end_mask_0 = const()[name = tensor<string, []>("op_7898_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7898_cast_fp16 = slice_by_index(begin = var_7898_begin_0, end = var_7898_end_0, end_mask = var_7898_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7898_cast_fp16")];
+            tensor<int32, [4]> var_7902_begin_0 = const()[name = tensor<string, []>("op_7902_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_7902_end_0 = const()[name = tensor<string, []>("op_7902_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_7902_end_mask_0 = const()[name = tensor<string, []>("op_7902_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7902_cast_fp16 = slice_by_index(begin = var_7902_begin_0, end = var_7902_end_0, end_mask = var_7902_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7902_cast_fp16")];
+            tensor<int32, [4]> var_7906_begin_0 = const()[name = tensor<string, []>("op_7906_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_7906_end_0 = const()[name = tensor<string, []>("op_7906_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_7906_end_mask_0 = const()[name = tensor<string, []>("op_7906_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7906_cast_fp16 = slice_by_index(begin = var_7906_begin_0, end = var_7906_end_0, end_mask = var_7906_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7906_cast_fp16")];
+            tensor<int32, [4]> var_7910_begin_0 = const()[name = tensor<string, []>("op_7910_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_7910_end_0 = const()[name = tensor<string, []>("op_7910_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_7910_end_mask_0 = const()[name = tensor<string, []>("op_7910_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7910_cast_fp16 = slice_by_index(begin = var_7910_begin_0, end = var_7910_end_0, end_mask = var_7910_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7910_cast_fp16")];
+            tensor<int32, [4]> var_7914_begin_0 = const()[name = tensor<string, []>("op_7914_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_7914_end_0 = const()[name = tensor<string, []>("op_7914_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_7914_end_mask_0 = const()[name = tensor<string, []>("op_7914_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7914_cast_fp16 = slice_by_index(begin = var_7914_begin_0, end = var_7914_end_0, end_mask = var_7914_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7914_cast_fp16")];
+            tensor<int32, [4]> var_7918_begin_0 = const()[name = tensor<string, []>("op_7918_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_7918_end_0 = const()[name = tensor<string, []>("op_7918_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_7918_end_mask_0 = const()[name = tensor<string, []>("op_7918_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7918_cast_fp16 = slice_by_index(begin = var_7918_begin_0, end = var_7918_end_0, end_mask = var_7918_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7918_cast_fp16")];
+            tensor<int32, [4]> var_7922_begin_0 = const()[name = tensor<string, []>("op_7922_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_7922_end_0 = const()[name = tensor<string, []>("op_7922_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_7922_end_mask_0 = const()[name = tensor<string, []>("op_7922_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_7922_cast_fp16 = slice_by_index(begin = var_7922_begin_0, end = var_7922_end_0, end_mask = var_7922_end_mask_0, x = query_17_cast_fp16)[name = tensor<string, []>("op_7922_cast_fp16")];
+            tensor<int32, [4]> var_7931_begin_0 = const()[name = tensor<string, []>("op_7931_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7931_end_0 = const()[name = tensor<string, []>("op_7931_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7931_end_mask_0 = const()[name = tensor<string, []>("op_7931_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7931_cast_fp16 = slice_by_index(begin = var_7931_begin_0, end = var_7931_end_0, end_mask = var_7931_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7931_cast_fp16")];
+            tensor<int32, [4]> var_7938_begin_0 = const()[name = tensor<string, []>("op_7938_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7938_end_0 = const()[name = tensor<string, []>("op_7938_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7938_end_mask_0 = const()[name = tensor<string, []>("op_7938_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7938_cast_fp16 = slice_by_index(begin = var_7938_begin_0, end = var_7938_end_0, end_mask = var_7938_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7938_cast_fp16")];
+            tensor<int32, [4]> var_7945_begin_0 = const()[name = tensor<string, []>("op_7945_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7945_end_0 = const()[name = tensor<string, []>("op_7945_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7945_end_mask_0 = const()[name = tensor<string, []>("op_7945_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7945_cast_fp16 = slice_by_index(begin = var_7945_begin_0, end = var_7945_end_0, end_mask = var_7945_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7945_cast_fp16")];
+            tensor<int32, [4]> var_7952_begin_0 = const()[name = tensor<string, []>("op_7952_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7952_end_0 = const()[name = tensor<string, []>("op_7952_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7952_end_mask_0 = const()[name = tensor<string, []>("op_7952_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7952_cast_fp16 = slice_by_index(begin = var_7952_begin_0, end = var_7952_end_0, end_mask = var_7952_end_mask_0, x = var_7878_cast_fp16)[name = tensor<string, []>("op_7952_cast_fp16")];
+            tensor<int32, [4]> var_7959_begin_0 = const()[name = tensor<string, []>("op_7959_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7959_end_0 = const()[name = tensor<string, []>("op_7959_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7959_end_mask_0 = const()[name = tensor<string, []>("op_7959_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7959_cast_fp16 = slice_by_index(begin = var_7959_begin_0, end = var_7959_end_0, end_mask = var_7959_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7959_cast_fp16")];
+            tensor<int32, [4]> var_7966_begin_0 = const()[name = tensor<string, []>("op_7966_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7966_end_0 = const()[name = tensor<string, []>("op_7966_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7966_end_mask_0 = const()[name = tensor<string, []>("op_7966_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7966_cast_fp16 = slice_by_index(begin = var_7966_begin_0, end = var_7966_end_0, end_mask = var_7966_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7966_cast_fp16")];
+            tensor<int32, [4]> var_7973_begin_0 = const()[name = tensor<string, []>("op_7973_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_7973_end_0 = const()[name = tensor<string, []>("op_7973_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_7973_end_mask_0 = const()[name = tensor<string, []>("op_7973_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7973_cast_fp16 = slice_by_index(begin = var_7973_begin_0, end = var_7973_end_0, end_mask = var_7973_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7973_cast_fp16")];
+            tensor<int32, [4]> var_7980_begin_0 = const()[name = tensor<string, []>("op_7980_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_7980_end_0 = const()[name = tensor<string, []>("op_7980_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_7980_end_mask_0 = const()[name = tensor<string, []>("op_7980_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7980_cast_fp16 = slice_by_index(begin = var_7980_begin_0, end = var_7980_end_0, end_mask = var_7980_end_mask_0, x = var_7882_cast_fp16)[name = tensor<string, []>("op_7980_cast_fp16")];
+            tensor<int32, [4]> var_7987_begin_0 = const()[name = tensor<string, []>("op_7987_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_7987_end_0 = const()[name = tensor<string, []>("op_7987_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_7987_end_mask_0 = const()[name = tensor<string, []>("op_7987_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7987_cast_fp16 = slice_by_index(begin = var_7987_begin_0, end = var_7987_end_0, end_mask = var_7987_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_7987_cast_fp16")];
+            tensor<int32, [4]> var_7994_begin_0 = const()[name = tensor<string, []>("op_7994_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_7994_end_0 = const()[name = tensor<string, []>("op_7994_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_7994_end_mask_0 = const()[name = tensor<string, []>("op_7994_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_7994_cast_fp16 = slice_by_index(begin = var_7994_begin_0, end = var_7994_end_0, end_mask = var_7994_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_7994_cast_fp16")];
+            tensor<int32, [4]> var_8001_begin_0 = const()[name = tensor<string, []>("op_8001_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8001_end_0 = const()[name = tensor<string, []>("op_8001_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8001_end_mask_0 = const()[name = tensor<string, []>("op_8001_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8001_cast_fp16 = slice_by_index(begin = var_8001_begin_0, end = var_8001_end_0, end_mask = var_8001_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_8001_cast_fp16")];
+            tensor<int32, [4]> var_8008_begin_0 = const()[name = tensor<string, []>("op_8008_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8008_end_0 = const()[name = tensor<string, []>("op_8008_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8008_end_mask_0 = const()[name = tensor<string, []>("op_8008_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8008_cast_fp16 = slice_by_index(begin = var_8008_begin_0, end = var_8008_end_0, end_mask = var_8008_end_mask_0, x = var_7886_cast_fp16)[name = tensor<string, []>("op_8008_cast_fp16")];
+            tensor<int32, [4]> var_8015_begin_0 = const()[name = tensor<string, []>("op_8015_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8015_end_0 = const()[name = tensor<string, []>("op_8015_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8015_end_mask_0 = const()[name = tensor<string, []>("op_8015_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8015_cast_fp16 = slice_by_index(begin = var_8015_begin_0, end = var_8015_end_0, end_mask = var_8015_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8015_cast_fp16")];
+            tensor<int32, [4]> var_8022_begin_0 = const()[name = tensor<string, []>("op_8022_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8022_end_0 = const()[name = tensor<string, []>("op_8022_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8022_end_mask_0 = const()[name = tensor<string, []>("op_8022_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8022_cast_fp16 = slice_by_index(begin = var_8022_begin_0, end = var_8022_end_0, end_mask = var_8022_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8022_cast_fp16")];
+            tensor<int32, [4]> var_8029_begin_0 = const()[name = tensor<string, []>("op_8029_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8029_end_0 = const()[name = tensor<string, []>("op_8029_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8029_end_mask_0 = const()[name = tensor<string, []>("op_8029_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8029_cast_fp16 = slice_by_index(begin = var_8029_begin_0, end = var_8029_end_0, end_mask = var_8029_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8029_cast_fp16")];
+            tensor<int32, [4]> var_8036_begin_0 = const()[name = tensor<string, []>("op_8036_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8036_end_0 = const()[name = tensor<string, []>("op_8036_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8036_end_mask_0 = const()[name = tensor<string, []>("op_8036_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8036_cast_fp16 = slice_by_index(begin = var_8036_begin_0, end = var_8036_end_0, end_mask = var_8036_end_mask_0, x = var_7890_cast_fp16)[name = tensor<string, []>("op_8036_cast_fp16")];
+            tensor<int32, [4]> var_8043_begin_0 = const()[name = tensor<string, []>("op_8043_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8043_end_0 = const()[name = tensor<string, []>("op_8043_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8043_end_mask_0 = const()[name = tensor<string, []>("op_8043_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8043_cast_fp16 = slice_by_index(begin = var_8043_begin_0, end = var_8043_end_0, end_mask = var_8043_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8043_cast_fp16")];
+            tensor<int32, [4]> var_8050_begin_0 = const()[name = tensor<string, []>("op_8050_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8050_end_0 = const()[name = tensor<string, []>("op_8050_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8050_end_mask_0 = const()[name = tensor<string, []>("op_8050_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8050_cast_fp16 = slice_by_index(begin = var_8050_begin_0, end = var_8050_end_0, end_mask = var_8050_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8050_cast_fp16")];
+            tensor<int32, [4]> var_8057_begin_0 = const()[name = tensor<string, []>("op_8057_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8057_end_0 = const()[name = tensor<string, []>("op_8057_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8057_end_mask_0 = const()[name = tensor<string, []>("op_8057_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8057_cast_fp16 = slice_by_index(begin = var_8057_begin_0, end = var_8057_end_0, end_mask = var_8057_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8057_cast_fp16")];
+            tensor<int32, [4]> var_8064_begin_0 = const()[name = tensor<string, []>("op_8064_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8064_end_0 = const()[name = tensor<string, []>("op_8064_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8064_end_mask_0 = const()[name = tensor<string, []>("op_8064_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8064_cast_fp16 = slice_by_index(begin = var_8064_begin_0, end = var_8064_end_0, end_mask = var_8064_end_mask_0, x = var_7894_cast_fp16)[name = tensor<string, []>("op_8064_cast_fp16")];
+            tensor<int32, [4]> var_8071_begin_0 = const()[name = tensor<string, []>("op_8071_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8071_end_0 = const()[name = tensor<string, []>("op_8071_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8071_end_mask_0 = const()[name = tensor<string, []>("op_8071_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8071_cast_fp16 = slice_by_index(begin = var_8071_begin_0, end = var_8071_end_0, end_mask = var_8071_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8071_cast_fp16")];
+            tensor<int32, [4]> var_8078_begin_0 = const()[name = tensor<string, []>("op_8078_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8078_end_0 = const()[name = tensor<string, []>("op_8078_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8078_end_mask_0 = const()[name = tensor<string, []>("op_8078_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8078_cast_fp16 = slice_by_index(begin = var_8078_begin_0, end = var_8078_end_0, end_mask = var_8078_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8078_cast_fp16")];
+            tensor<int32, [4]> var_8085_begin_0 = const()[name = tensor<string, []>("op_8085_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8085_end_0 = const()[name = tensor<string, []>("op_8085_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8085_end_mask_0 = const()[name = tensor<string, []>("op_8085_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8085_cast_fp16 = slice_by_index(begin = var_8085_begin_0, end = var_8085_end_0, end_mask = var_8085_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8085_cast_fp16")];
+            tensor<int32, [4]> var_8092_begin_0 = const()[name = tensor<string, []>("op_8092_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8092_end_0 = const()[name = tensor<string, []>("op_8092_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8092_end_mask_0 = const()[name = tensor<string, []>("op_8092_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8092_cast_fp16 = slice_by_index(begin = var_8092_begin_0, end = var_8092_end_0, end_mask = var_8092_end_mask_0, x = var_7898_cast_fp16)[name = tensor<string, []>("op_8092_cast_fp16")];
+            tensor<int32, [4]> var_8099_begin_0 = const()[name = tensor<string, []>("op_8099_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8099_end_0 = const()[name = tensor<string, []>("op_8099_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8099_end_mask_0 = const()[name = tensor<string, []>("op_8099_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8099_cast_fp16 = slice_by_index(begin = var_8099_begin_0, end = var_8099_end_0, end_mask = var_8099_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8099_cast_fp16")];
+            tensor<int32, [4]> var_8106_begin_0 = const()[name = tensor<string, []>("op_8106_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8106_end_0 = const()[name = tensor<string, []>("op_8106_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8106_end_mask_0 = const()[name = tensor<string, []>("op_8106_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8106_cast_fp16 = slice_by_index(begin = var_8106_begin_0, end = var_8106_end_0, end_mask = var_8106_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8106_cast_fp16")];
+            tensor<int32, [4]> var_8113_begin_0 = const()[name = tensor<string, []>("op_8113_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8113_end_0 = const()[name = tensor<string, []>("op_8113_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8113_end_mask_0 = const()[name = tensor<string, []>("op_8113_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8113_cast_fp16 = slice_by_index(begin = var_8113_begin_0, end = var_8113_end_0, end_mask = var_8113_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8113_cast_fp16")];
+            tensor<int32, [4]> var_8120_begin_0 = const()[name = tensor<string, []>("op_8120_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8120_end_0 = const()[name = tensor<string, []>("op_8120_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8120_end_mask_0 = const()[name = tensor<string, []>("op_8120_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8120_cast_fp16 = slice_by_index(begin = var_8120_begin_0, end = var_8120_end_0, end_mask = var_8120_end_mask_0, x = var_7902_cast_fp16)[name = tensor<string, []>("op_8120_cast_fp16")];
+            tensor<int32, [4]> var_8127_begin_0 = const()[name = tensor<string, []>("op_8127_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8127_end_0 = const()[name = tensor<string, []>("op_8127_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8127_end_mask_0 = const()[name = tensor<string, []>("op_8127_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8127_cast_fp16 = slice_by_index(begin = var_8127_begin_0, end = var_8127_end_0, end_mask = var_8127_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8127_cast_fp16")];
+            tensor<int32, [4]> var_8134_begin_0 = const()[name = tensor<string, []>("op_8134_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8134_end_0 = const()[name = tensor<string, []>("op_8134_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8134_end_mask_0 = const()[name = tensor<string, []>("op_8134_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8134_cast_fp16 = slice_by_index(begin = var_8134_begin_0, end = var_8134_end_0, end_mask = var_8134_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8134_cast_fp16")];
+            tensor<int32, [4]> var_8141_begin_0 = const()[name = tensor<string, []>("op_8141_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8141_end_0 = const()[name = tensor<string, []>("op_8141_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8141_end_mask_0 = const()[name = tensor<string, []>("op_8141_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8141_cast_fp16 = slice_by_index(begin = var_8141_begin_0, end = var_8141_end_0, end_mask = var_8141_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8141_cast_fp16")];
+            tensor<int32, [4]> var_8148_begin_0 = const()[name = tensor<string, []>("op_8148_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8148_end_0 = const()[name = tensor<string, []>("op_8148_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8148_end_mask_0 = const()[name = tensor<string, []>("op_8148_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8148_cast_fp16 = slice_by_index(begin = var_8148_begin_0, end = var_8148_end_0, end_mask = var_8148_end_mask_0, x = var_7906_cast_fp16)[name = tensor<string, []>("op_8148_cast_fp16")];
+            tensor<int32, [4]> var_8155_begin_0 = const()[name = tensor<string, []>("op_8155_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8155_end_0 = const()[name = tensor<string, []>("op_8155_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8155_end_mask_0 = const()[name = tensor<string, []>("op_8155_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8155_cast_fp16 = slice_by_index(begin = var_8155_begin_0, end = var_8155_end_0, end_mask = var_8155_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8155_cast_fp16")];
+            tensor<int32, [4]> var_8162_begin_0 = const()[name = tensor<string, []>("op_8162_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8162_end_0 = const()[name = tensor<string, []>("op_8162_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8162_end_mask_0 = const()[name = tensor<string, []>("op_8162_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8162_cast_fp16 = slice_by_index(begin = var_8162_begin_0, end = var_8162_end_0, end_mask = var_8162_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8162_cast_fp16")];
+            tensor<int32, [4]> var_8169_begin_0 = const()[name = tensor<string, []>("op_8169_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8169_end_0 = const()[name = tensor<string, []>("op_8169_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8169_end_mask_0 = const()[name = tensor<string, []>("op_8169_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8169_cast_fp16 = slice_by_index(begin = var_8169_begin_0, end = var_8169_end_0, end_mask = var_8169_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8169_cast_fp16")];
+            tensor<int32, [4]> var_8176_begin_0 = const()[name = tensor<string, []>("op_8176_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8176_end_0 = const()[name = tensor<string, []>("op_8176_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8176_end_mask_0 = const()[name = tensor<string, []>("op_8176_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8176_cast_fp16 = slice_by_index(begin = var_8176_begin_0, end = var_8176_end_0, end_mask = var_8176_end_mask_0, x = var_7910_cast_fp16)[name = tensor<string, []>("op_8176_cast_fp16")];
+            tensor<int32, [4]> var_8183_begin_0 = const()[name = tensor<string, []>("op_8183_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8183_end_0 = const()[name = tensor<string, []>("op_8183_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8183_end_mask_0 = const()[name = tensor<string, []>("op_8183_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8183_cast_fp16 = slice_by_index(begin = var_8183_begin_0, end = var_8183_end_0, end_mask = var_8183_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8183_cast_fp16")];
+            tensor<int32, [4]> var_8190_begin_0 = const()[name = tensor<string, []>("op_8190_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8190_end_0 = const()[name = tensor<string, []>("op_8190_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8190_end_mask_0 = const()[name = tensor<string, []>("op_8190_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8190_cast_fp16 = slice_by_index(begin = var_8190_begin_0, end = var_8190_end_0, end_mask = var_8190_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8190_cast_fp16")];
+            tensor<int32, [4]> var_8197_begin_0 = const()[name = tensor<string, []>("op_8197_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8197_end_0 = const()[name = tensor<string, []>("op_8197_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8197_end_mask_0 = const()[name = tensor<string, []>("op_8197_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8197_cast_fp16 = slice_by_index(begin = var_8197_begin_0, end = var_8197_end_0, end_mask = var_8197_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8197_cast_fp16")];
+            tensor<int32, [4]> var_8204_begin_0 = const()[name = tensor<string, []>("op_8204_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8204_end_0 = const()[name = tensor<string, []>("op_8204_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8204_end_mask_0 = const()[name = tensor<string, []>("op_8204_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8204_cast_fp16 = slice_by_index(begin = var_8204_begin_0, end = var_8204_end_0, end_mask = var_8204_end_mask_0, x = var_7914_cast_fp16)[name = tensor<string, []>("op_8204_cast_fp16")];
+            tensor<int32, [4]> var_8211_begin_0 = const()[name = tensor<string, []>("op_8211_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8211_end_0 = const()[name = tensor<string, []>("op_8211_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8211_end_mask_0 = const()[name = tensor<string, []>("op_8211_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8211_cast_fp16 = slice_by_index(begin = var_8211_begin_0, end = var_8211_end_0, end_mask = var_8211_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8211_cast_fp16")];
+            tensor<int32, [4]> var_8218_begin_0 = const()[name = tensor<string, []>("op_8218_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8218_end_0 = const()[name = tensor<string, []>("op_8218_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8218_end_mask_0 = const()[name = tensor<string, []>("op_8218_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8218_cast_fp16 = slice_by_index(begin = var_8218_begin_0, end = var_8218_end_0, end_mask = var_8218_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8218_cast_fp16")];
+            tensor<int32, [4]> var_8225_begin_0 = const()[name = tensor<string, []>("op_8225_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8225_end_0 = const()[name = tensor<string, []>("op_8225_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8225_end_mask_0 = const()[name = tensor<string, []>("op_8225_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8225_cast_fp16 = slice_by_index(begin = var_8225_begin_0, end = var_8225_end_0, end_mask = var_8225_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8225_cast_fp16")];
+            tensor<int32, [4]> var_8232_begin_0 = const()[name = tensor<string, []>("op_8232_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8232_end_0 = const()[name = tensor<string, []>("op_8232_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8232_end_mask_0 = const()[name = tensor<string, []>("op_8232_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8232_cast_fp16 = slice_by_index(begin = var_8232_begin_0, end = var_8232_end_0, end_mask = var_8232_end_mask_0, x = var_7918_cast_fp16)[name = tensor<string, []>("op_8232_cast_fp16")];
+            tensor<int32, [4]> var_8239_begin_0 = const()[name = tensor<string, []>("op_8239_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8239_end_0 = const()[name = tensor<string, []>("op_8239_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8239_end_mask_0 = const()[name = tensor<string, []>("op_8239_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8239_cast_fp16 = slice_by_index(begin = var_8239_begin_0, end = var_8239_end_0, end_mask = var_8239_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8239_cast_fp16")];
+            tensor<int32, [4]> var_8246_begin_0 = const()[name = tensor<string, []>("op_8246_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8246_end_0 = const()[name = tensor<string, []>("op_8246_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8246_end_mask_0 = const()[name = tensor<string, []>("op_8246_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8246_cast_fp16 = slice_by_index(begin = var_8246_begin_0, end = var_8246_end_0, end_mask = var_8246_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8246_cast_fp16")];
+            tensor<int32, [4]> var_8253_begin_0 = const()[name = tensor<string, []>("op_8253_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8253_end_0 = const()[name = tensor<string, []>("op_8253_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8253_end_mask_0 = const()[name = tensor<string, []>("op_8253_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8253_cast_fp16 = slice_by_index(begin = var_8253_begin_0, end = var_8253_end_0, end_mask = var_8253_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8253_cast_fp16")];
+            tensor<int32, [4]> var_8260_begin_0 = const()[name = tensor<string, []>("op_8260_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8260_end_0 = const()[name = tensor<string, []>("op_8260_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8260_end_mask_0 = const()[name = tensor<string, []>("op_8260_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8260_cast_fp16 = slice_by_index(begin = var_8260_begin_0, end = var_8260_end_0, end_mask = var_8260_end_mask_0, x = var_7922_cast_fp16)[name = tensor<string, []>("op_8260_cast_fp16")];
+            tensor<int32, [4]> k_17_perm_0 = const()[name = tensor<string, []>("k_17_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_8265_begin_0 = const()[name = tensor<string, []>("op_8265_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8265_end_0 = const()[name = tensor<string, []>("op_8265_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_8265_end_mask_0 = const()[name = tensor<string, []>("op_8265_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_3 = transpose(perm = k_17_perm_0, x = key_17_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8265_cast_fp16 = slice_by_index(begin = var_8265_begin_0, end = var_8265_end_0, end_mask = var_8265_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8265_cast_fp16")];
+            tensor<int32, [4]> var_8269_begin_0 = const()[name = tensor<string, []>("op_8269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_8269_end_0 = const()[name = tensor<string, []>("op_8269_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_8269_end_mask_0 = const()[name = tensor<string, []>("op_8269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8269_cast_fp16 = slice_by_index(begin = var_8269_begin_0, end = var_8269_end_0, end_mask = var_8269_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8269_cast_fp16")];
+            tensor<int32, [4]> var_8273_begin_0 = const()[name = tensor<string, []>("op_8273_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_8273_end_0 = const()[name = tensor<string, []>("op_8273_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_8273_end_mask_0 = const()[name = tensor<string, []>("op_8273_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8273_cast_fp16 = slice_by_index(begin = var_8273_begin_0, end = var_8273_end_0, end_mask = var_8273_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8273_cast_fp16")];
+            tensor<int32, [4]> var_8277_begin_0 = const()[name = tensor<string, []>("op_8277_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_8277_end_0 = const()[name = tensor<string, []>("op_8277_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_8277_end_mask_0 = const()[name = tensor<string, []>("op_8277_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8277_cast_fp16 = slice_by_index(begin = var_8277_begin_0, end = var_8277_end_0, end_mask = var_8277_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8277_cast_fp16")];
+            tensor<int32, [4]> var_8281_begin_0 = const()[name = tensor<string, []>("op_8281_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_8281_end_0 = const()[name = tensor<string, []>("op_8281_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_8281_end_mask_0 = const()[name = tensor<string, []>("op_8281_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8281_cast_fp16 = slice_by_index(begin = var_8281_begin_0, end = var_8281_end_0, end_mask = var_8281_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8281_cast_fp16")];
+            tensor<int32, [4]> var_8285_begin_0 = const()[name = tensor<string, []>("op_8285_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_8285_end_0 = const()[name = tensor<string, []>("op_8285_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_8285_end_mask_0 = const()[name = tensor<string, []>("op_8285_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8285_cast_fp16 = slice_by_index(begin = var_8285_begin_0, end = var_8285_end_0, end_mask = var_8285_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8285_cast_fp16")];
+            tensor<int32, [4]> var_8289_begin_0 = const()[name = tensor<string, []>("op_8289_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_8289_end_0 = const()[name = tensor<string, []>("op_8289_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_8289_end_mask_0 = const()[name = tensor<string, []>("op_8289_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8289_cast_fp16 = slice_by_index(begin = var_8289_begin_0, end = var_8289_end_0, end_mask = var_8289_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8289_cast_fp16")];
+            tensor<int32, [4]> var_8293_begin_0 = const()[name = tensor<string, []>("op_8293_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_8293_end_0 = const()[name = tensor<string, []>("op_8293_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_8293_end_mask_0 = const()[name = tensor<string, []>("op_8293_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8293_cast_fp16 = slice_by_index(begin = var_8293_begin_0, end = var_8293_end_0, end_mask = var_8293_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8293_cast_fp16")];
+            tensor<int32, [4]> var_8297_begin_0 = const()[name = tensor<string, []>("op_8297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_8297_end_0 = const()[name = tensor<string, []>("op_8297_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_8297_end_mask_0 = const()[name = tensor<string, []>("op_8297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8297_cast_fp16 = slice_by_index(begin = var_8297_begin_0, end = var_8297_end_0, end_mask = var_8297_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8297_cast_fp16")];
+            tensor<int32, [4]> var_8301_begin_0 = const()[name = tensor<string, []>("op_8301_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_8301_end_0 = const()[name = tensor<string, []>("op_8301_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_8301_end_mask_0 = const()[name = tensor<string, []>("op_8301_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8301_cast_fp16 = slice_by_index(begin = var_8301_begin_0, end = var_8301_end_0, end_mask = var_8301_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8301_cast_fp16")];
+            tensor<int32, [4]> var_8305_begin_0 = const()[name = tensor<string, []>("op_8305_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_8305_end_0 = const()[name = tensor<string, []>("op_8305_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_8305_end_mask_0 = const()[name = tensor<string, []>("op_8305_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8305_cast_fp16 = slice_by_index(begin = var_8305_begin_0, end = var_8305_end_0, end_mask = var_8305_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8305_cast_fp16")];
+            tensor<int32, [4]> var_8309_begin_0 = const()[name = tensor<string, []>("op_8309_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_8309_end_0 = const()[name = tensor<string, []>("op_8309_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_8309_end_mask_0 = const()[name = tensor<string, []>("op_8309_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_8309_cast_fp16 = slice_by_index(begin = var_8309_begin_0, end = var_8309_end_0, end_mask = var_8309_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_8309_cast_fp16")];
+            tensor<int32, [4]> var_8311_begin_0 = const()[name = tensor<string, []>("op_8311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8311_end_0 = const()[name = tensor<string, []>("op_8311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8311_end_mask_0 = const()[name = tensor<string, []>("op_8311_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8311_cast_fp16 = slice_by_index(begin = var_8311_begin_0, end = var_8311_end_0, end_mask = var_8311_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8311_cast_fp16")];
+            tensor<int32, [4]> var_8315_begin_0 = const()[name = tensor<string, []>("op_8315_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_8315_end_0 = const()[name = tensor<string, []>("op_8315_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_8315_end_mask_0 = const()[name = tensor<string, []>("op_8315_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8315_cast_fp16 = slice_by_index(begin = var_8315_begin_0, end = var_8315_end_0, end_mask = var_8315_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8315_cast_fp16")];
+            tensor<int32, [4]> var_8319_begin_0 = const()[name = tensor<string, []>("op_8319_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_8319_end_0 = const()[name = tensor<string, []>("op_8319_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_8319_end_mask_0 = const()[name = tensor<string, []>("op_8319_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8319_cast_fp16 = slice_by_index(begin = var_8319_begin_0, end = var_8319_end_0, end_mask = var_8319_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8319_cast_fp16")];
+            tensor<int32, [4]> var_8323_begin_0 = const()[name = tensor<string, []>("op_8323_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_8323_end_0 = const()[name = tensor<string, []>("op_8323_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_8323_end_mask_0 = const()[name = tensor<string, []>("op_8323_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8323_cast_fp16 = slice_by_index(begin = var_8323_begin_0, end = var_8323_end_0, end_mask = var_8323_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8323_cast_fp16")];
+            tensor<int32, [4]> var_8327_begin_0 = const()[name = tensor<string, []>("op_8327_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_8327_end_0 = const()[name = tensor<string, []>("op_8327_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_8327_end_mask_0 = const()[name = tensor<string, []>("op_8327_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8327_cast_fp16 = slice_by_index(begin = var_8327_begin_0, end = var_8327_end_0, end_mask = var_8327_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8327_cast_fp16")];
+            tensor<int32, [4]> var_8331_begin_0 = const()[name = tensor<string, []>("op_8331_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_8331_end_0 = const()[name = tensor<string, []>("op_8331_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_8331_end_mask_0 = const()[name = tensor<string, []>("op_8331_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8331_cast_fp16 = slice_by_index(begin = var_8331_begin_0, end = var_8331_end_0, end_mask = var_8331_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8331_cast_fp16")];
+            tensor<int32, [4]> var_8335_begin_0 = const()[name = tensor<string, []>("op_8335_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_8335_end_0 = const()[name = tensor<string, []>("op_8335_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_8335_end_mask_0 = const()[name = tensor<string, []>("op_8335_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8335_cast_fp16 = slice_by_index(begin = var_8335_begin_0, end = var_8335_end_0, end_mask = var_8335_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8335_cast_fp16")];
+            tensor<int32, [4]> var_8339_begin_0 = const()[name = tensor<string, []>("op_8339_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_8339_end_0 = const()[name = tensor<string, []>("op_8339_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_8339_end_mask_0 = const()[name = tensor<string, []>("op_8339_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8339_cast_fp16 = slice_by_index(begin = var_8339_begin_0, end = var_8339_end_0, end_mask = var_8339_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8339_cast_fp16")];
+            tensor<int32, [4]> var_8343_begin_0 = const()[name = tensor<string, []>("op_8343_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_8343_end_0 = const()[name = tensor<string, []>("op_8343_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_8343_end_mask_0 = const()[name = tensor<string, []>("op_8343_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8343_cast_fp16 = slice_by_index(begin = var_8343_begin_0, end = var_8343_end_0, end_mask = var_8343_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8343_cast_fp16")];
+            tensor<int32, [4]> var_8347_begin_0 = const()[name = tensor<string, []>("op_8347_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_8347_end_0 = const()[name = tensor<string, []>("op_8347_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_8347_end_mask_0 = const()[name = tensor<string, []>("op_8347_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8347_cast_fp16 = slice_by_index(begin = var_8347_begin_0, end = var_8347_end_0, end_mask = var_8347_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8347_cast_fp16")];
+            tensor<int32, [4]> var_8351_begin_0 = const()[name = tensor<string, []>("op_8351_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_8351_end_0 = const()[name = tensor<string, []>("op_8351_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_8351_end_mask_0 = const()[name = tensor<string, []>("op_8351_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8351_cast_fp16 = slice_by_index(begin = var_8351_begin_0, end = var_8351_end_0, end_mask = var_8351_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8351_cast_fp16")];
+            tensor<int32, [4]> var_8355_begin_0 = const()[name = tensor<string, []>("op_8355_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_8355_end_0 = const()[name = tensor<string, []>("op_8355_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_8355_end_mask_0 = const()[name = tensor<string, []>("op_8355_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8355_cast_fp16 = slice_by_index(begin = var_8355_begin_0, end = var_8355_end_0, end_mask = var_8355_end_mask_0, x = value_17_cast_fp16)[name = tensor<string, []>("op_8355_cast_fp16")];
+            tensor<string, []> var_8359_equation_0 = const()[name = tensor<string, []>("op_8359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8359_cast_fp16 = einsum(equation = var_8359_equation_0, values = (var_8265_cast_fp16, var_7931_cast_fp16))[name = tensor<string, []>("op_8359_cast_fp16")];
+            tensor<fp16, []> var_8360_to_fp16 = const()[name = tensor<string, []>("op_8360_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_769_cast_fp16 = mul(x = var_8359_cast_fp16, y = var_8360_to_fp16)[name = tensor<string, []>("aw_chunk_769_cast_fp16")];
+            tensor<string, []> var_8363_equation_0 = const()[name = tensor<string, []>("op_8363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8363_cast_fp16 = einsum(equation = var_8363_equation_0, values = (var_8265_cast_fp16, var_7938_cast_fp16))[name = tensor<string, []>("op_8363_cast_fp16")];
+            tensor<fp16, []> var_8364_to_fp16 = const()[name = tensor<string, []>("op_8364_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_771_cast_fp16 = mul(x = var_8363_cast_fp16, y = var_8364_to_fp16)[name = tensor<string, []>("aw_chunk_771_cast_fp16")];
+            tensor<string, []> var_8367_equation_0 = const()[name = tensor<string, []>("op_8367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8367_cast_fp16 = einsum(equation = var_8367_equation_0, values = (var_8265_cast_fp16, var_7945_cast_fp16))[name = tensor<string, []>("op_8367_cast_fp16")];
+            tensor<fp16, []> var_8368_to_fp16 = const()[name = tensor<string, []>("op_8368_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_773_cast_fp16 = mul(x = var_8367_cast_fp16, y = var_8368_to_fp16)[name = tensor<string, []>("aw_chunk_773_cast_fp16")];
+            tensor<string, []> var_8371_equation_0 = const()[name = tensor<string, []>("op_8371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8371_cast_fp16 = einsum(equation = var_8371_equation_0, values = (var_8265_cast_fp16, var_7952_cast_fp16))[name = tensor<string, []>("op_8371_cast_fp16")];
+            tensor<fp16, []> var_8372_to_fp16 = const()[name = tensor<string, []>("op_8372_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_775_cast_fp16 = mul(x = var_8371_cast_fp16, y = var_8372_to_fp16)[name = tensor<string, []>("aw_chunk_775_cast_fp16")];
+            tensor<string, []> var_8375_equation_0 = const()[name = tensor<string, []>("op_8375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8375_cast_fp16 = einsum(equation = var_8375_equation_0, values = (var_8269_cast_fp16, var_7959_cast_fp16))[name = tensor<string, []>("op_8375_cast_fp16")];
+            tensor<fp16, []> var_8376_to_fp16 = const()[name = tensor<string, []>("op_8376_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_777_cast_fp16 = mul(x = var_8375_cast_fp16, y = var_8376_to_fp16)[name = tensor<string, []>("aw_chunk_777_cast_fp16")];
+            tensor<string, []> var_8379_equation_0 = const()[name = tensor<string, []>("op_8379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8379_cast_fp16 = einsum(equation = var_8379_equation_0, values = (var_8269_cast_fp16, var_7966_cast_fp16))[name = tensor<string, []>("op_8379_cast_fp16")];
+            tensor<fp16, []> var_8380_to_fp16 = const()[name = tensor<string, []>("op_8380_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_779_cast_fp16 = mul(x = var_8379_cast_fp16, y = var_8380_to_fp16)[name = tensor<string, []>("aw_chunk_779_cast_fp16")];
+            tensor<string, []> var_8383_equation_0 = const()[name = tensor<string, []>("op_8383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8383_cast_fp16 = einsum(equation = var_8383_equation_0, values = (var_8269_cast_fp16, var_7973_cast_fp16))[name = tensor<string, []>("op_8383_cast_fp16")];
+            tensor<fp16, []> var_8384_to_fp16 = const()[name = tensor<string, []>("op_8384_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_781_cast_fp16 = mul(x = var_8383_cast_fp16, y = var_8384_to_fp16)[name = tensor<string, []>("aw_chunk_781_cast_fp16")];
+            tensor<string, []> var_8387_equation_0 = const()[name = tensor<string, []>("op_8387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8387_cast_fp16 = einsum(equation = var_8387_equation_0, values = (var_8269_cast_fp16, var_7980_cast_fp16))[name = tensor<string, []>("op_8387_cast_fp16")];
+            tensor<fp16, []> var_8388_to_fp16 = const()[name = tensor<string, []>("op_8388_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_783_cast_fp16 = mul(x = var_8387_cast_fp16, y = var_8388_to_fp16)[name = tensor<string, []>("aw_chunk_783_cast_fp16")];
+            tensor<string, []> var_8391_equation_0 = const()[name = tensor<string, []>("op_8391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8391_cast_fp16 = einsum(equation = var_8391_equation_0, values = (var_8273_cast_fp16, var_7987_cast_fp16))[name = tensor<string, []>("op_8391_cast_fp16")];
+            tensor<fp16, []> var_8392_to_fp16 = const()[name = tensor<string, []>("op_8392_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_785_cast_fp16 = mul(x = var_8391_cast_fp16, y = var_8392_to_fp16)[name = tensor<string, []>("aw_chunk_785_cast_fp16")];
+            tensor<string, []> var_8395_equation_0 = const()[name = tensor<string, []>("op_8395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8395_cast_fp16 = einsum(equation = var_8395_equation_0, values = (var_8273_cast_fp16, var_7994_cast_fp16))[name = tensor<string, []>("op_8395_cast_fp16")];
+            tensor<fp16, []> var_8396_to_fp16 = const()[name = tensor<string, []>("op_8396_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_787_cast_fp16 = mul(x = var_8395_cast_fp16, y = var_8396_to_fp16)[name = tensor<string, []>("aw_chunk_787_cast_fp16")];
+            tensor<string, []> var_8399_equation_0 = const()[name = tensor<string, []>("op_8399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8399_cast_fp16 = einsum(equation = var_8399_equation_0, values = (var_8273_cast_fp16, var_8001_cast_fp16))[name = tensor<string, []>("op_8399_cast_fp16")];
+            tensor<fp16, []> var_8400_to_fp16 = const()[name = tensor<string, []>("op_8400_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_789_cast_fp16 = mul(x = var_8399_cast_fp16, y = var_8400_to_fp16)[name = tensor<string, []>("aw_chunk_789_cast_fp16")];
+            tensor<string, []> var_8403_equation_0 = const()[name = tensor<string, []>("op_8403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8403_cast_fp16 = einsum(equation = var_8403_equation_0, values = (var_8273_cast_fp16, var_8008_cast_fp16))[name = tensor<string, []>("op_8403_cast_fp16")];
+            tensor<fp16, []> var_8404_to_fp16 = const()[name = tensor<string, []>("op_8404_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_791_cast_fp16 = mul(x = var_8403_cast_fp16, y = var_8404_to_fp16)[name = tensor<string, []>("aw_chunk_791_cast_fp16")];
+            tensor<string, []> var_8407_equation_0 = const()[name = tensor<string, []>("op_8407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8407_cast_fp16 = einsum(equation = var_8407_equation_0, values = (var_8277_cast_fp16, var_8015_cast_fp16))[name = tensor<string, []>("op_8407_cast_fp16")];
+            tensor<fp16, []> var_8408_to_fp16 = const()[name = tensor<string, []>("op_8408_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_793_cast_fp16 = mul(x = var_8407_cast_fp16, y = var_8408_to_fp16)[name = tensor<string, []>("aw_chunk_793_cast_fp16")];
+            tensor<string, []> var_8411_equation_0 = const()[name = tensor<string, []>("op_8411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8411_cast_fp16 = einsum(equation = var_8411_equation_0, values = (var_8277_cast_fp16, var_8022_cast_fp16))[name = tensor<string, []>("op_8411_cast_fp16")];
+            tensor<fp16, []> var_8412_to_fp16 = const()[name = tensor<string, []>("op_8412_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_795_cast_fp16 = mul(x = var_8411_cast_fp16, y = var_8412_to_fp16)[name = tensor<string, []>("aw_chunk_795_cast_fp16")];
+            tensor<string, []> var_8415_equation_0 = const()[name = tensor<string, []>("op_8415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8415_cast_fp16 = einsum(equation = var_8415_equation_0, values = (var_8277_cast_fp16, var_8029_cast_fp16))[name = tensor<string, []>("op_8415_cast_fp16")];
+            tensor<fp16, []> var_8416_to_fp16 = const()[name = tensor<string, []>("op_8416_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_797_cast_fp16 = mul(x = var_8415_cast_fp16, y = var_8416_to_fp16)[name = tensor<string, []>("aw_chunk_797_cast_fp16")];
+            tensor<string, []> var_8419_equation_0 = const()[name = tensor<string, []>("op_8419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8419_cast_fp16 = einsum(equation = var_8419_equation_0, values = (var_8277_cast_fp16, var_8036_cast_fp16))[name = tensor<string, []>("op_8419_cast_fp16")];
+            tensor<fp16, []> var_8420_to_fp16 = const()[name = tensor<string, []>("op_8420_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_799_cast_fp16 = mul(x = var_8419_cast_fp16, y = var_8420_to_fp16)[name = tensor<string, []>("aw_chunk_799_cast_fp16")];
+            tensor<string, []> var_8423_equation_0 = const()[name = tensor<string, []>("op_8423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8423_cast_fp16 = einsum(equation = var_8423_equation_0, values = (var_8281_cast_fp16, var_8043_cast_fp16))[name = tensor<string, []>("op_8423_cast_fp16")];
+            tensor<fp16, []> var_8424_to_fp16 = const()[name = tensor<string, []>("op_8424_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_801_cast_fp16 = mul(x = var_8423_cast_fp16, y = var_8424_to_fp16)[name = tensor<string, []>("aw_chunk_801_cast_fp16")];
+            tensor<string, []> var_8427_equation_0 = const()[name = tensor<string, []>("op_8427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8427_cast_fp16 = einsum(equation = var_8427_equation_0, values = (var_8281_cast_fp16, var_8050_cast_fp16))[name = tensor<string, []>("op_8427_cast_fp16")];
+            tensor<fp16, []> var_8428_to_fp16 = const()[name = tensor<string, []>("op_8428_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_803_cast_fp16 = mul(x = var_8427_cast_fp16, y = var_8428_to_fp16)[name = tensor<string, []>("aw_chunk_803_cast_fp16")];
+            tensor<string, []> var_8431_equation_0 = const()[name = tensor<string, []>("op_8431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8431_cast_fp16 = einsum(equation = var_8431_equation_0, values = (var_8281_cast_fp16, var_8057_cast_fp16))[name = tensor<string, []>("op_8431_cast_fp16")];
+            tensor<fp16, []> var_8432_to_fp16 = const()[name = tensor<string, []>("op_8432_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_805_cast_fp16 = mul(x = var_8431_cast_fp16, y = var_8432_to_fp16)[name = tensor<string, []>("aw_chunk_805_cast_fp16")];
+            tensor<string, []> var_8435_equation_0 = const()[name = tensor<string, []>("op_8435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8435_cast_fp16 = einsum(equation = var_8435_equation_0, values = (var_8281_cast_fp16, var_8064_cast_fp16))[name = tensor<string, []>("op_8435_cast_fp16")];
+            tensor<fp16, []> var_8436_to_fp16 = const()[name = tensor<string, []>("op_8436_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_807_cast_fp16 = mul(x = var_8435_cast_fp16, y = var_8436_to_fp16)[name = tensor<string, []>("aw_chunk_807_cast_fp16")];
+            tensor<string, []> var_8439_equation_0 = const()[name = tensor<string, []>("op_8439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8439_cast_fp16 = einsum(equation = var_8439_equation_0, values = (var_8285_cast_fp16, var_8071_cast_fp16))[name = tensor<string, []>("op_8439_cast_fp16")];
+            tensor<fp16, []> var_8440_to_fp16 = const()[name = tensor<string, []>("op_8440_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_809_cast_fp16 = mul(x = var_8439_cast_fp16, y = var_8440_to_fp16)[name = tensor<string, []>("aw_chunk_809_cast_fp16")];
+            tensor<string, []> var_8443_equation_0 = const()[name = tensor<string, []>("op_8443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8443_cast_fp16 = einsum(equation = var_8443_equation_0, values = (var_8285_cast_fp16, var_8078_cast_fp16))[name = tensor<string, []>("op_8443_cast_fp16")];
+            tensor<fp16, []> var_8444_to_fp16 = const()[name = tensor<string, []>("op_8444_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_811_cast_fp16 = mul(x = var_8443_cast_fp16, y = var_8444_to_fp16)[name = tensor<string, []>("aw_chunk_811_cast_fp16")];
+            tensor<string, []> var_8447_equation_0 = const()[name = tensor<string, []>("op_8447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8447_cast_fp16 = einsum(equation = var_8447_equation_0, values = (var_8285_cast_fp16, var_8085_cast_fp16))[name = tensor<string, []>("op_8447_cast_fp16")];
+            tensor<fp16, []> var_8448_to_fp16 = const()[name = tensor<string, []>("op_8448_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_813_cast_fp16 = mul(x = var_8447_cast_fp16, y = var_8448_to_fp16)[name = tensor<string, []>("aw_chunk_813_cast_fp16")];
+            tensor<string, []> var_8451_equation_0 = const()[name = tensor<string, []>("op_8451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8451_cast_fp16 = einsum(equation = var_8451_equation_0, values = (var_8285_cast_fp16, var_8092_cast_fp16))[name = tensor<string, []>("op_8451_cast_fp16")];
+            tensor<fp16, []> var_8452_to_fp16 = const()[name = tensor<string, []>("op_8452_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_815_cast_fp16 = mul(x = var_8451_cast_fp16, y = var_8452_to_fp16)[name = tensor<string, []>("aw_chunk_815_cast_fp16")];
+            tensor<string, []> var_8455_equation_0 = const()[name = tensor<string, []>("op_8455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8455_cast_fp16 = einsum(equation = var_8455_equation_0, values = (var_8289_cast_fp16, var_8099_cast_fp16))[name = tensor<string, []>("op_8455_cast_fp16")];
+            tensor<fp16, []> var_8456_to_fp16 = const()[name = tensor<string, []>("op_8456_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_817_cast_fp16 = mul(x = var_8455_cast_fp16, y = var_8456_to_fp16)[name = tensor<string, []>("aw_chunk_817_cast_fp16")];
+            tensor<string, []> var_8459_equation_0 = const()[name = tensor<string, []>("op_8459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8459_cast_fp16 = einsum(equation = var_8459_equation_0, values = (var_8289_cast_fp16, var_8106_cast_fp16))[name = tensor<string, []>("op_8459_cast_fp16")];
+            tensor<fp16, []> var_8460_to_fp16 = const()[name = tensor<string, []>("op_8460_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_819_cast_fp16 = mul(x = var_8459_cast_fp16, y = var_8460_to_fp16)[name = tensor<string, []>("aw_chunk_819_cast_fp16")];
+            tensor<string, []> var_8463_equation_0 = const()[name = tensor<string, []>("op_8463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8463_cast_fp16 = einsum(equation = var_8463_equation_0, values = (var_8289_cast_fp16, var_8113_cast_fp16))[name = tensor<string, []>("op_8463_cast_fp16")];
+            tensor<fp16, []> var_8464_to_fp16 = const()[name = tensor<string, []>("op_8464_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_821_cast_fp16 = mul(x = var_8463_cast_fp16, y = var_8464_to_fp16)[name = tensor<string, []>("aw_chunk_821_cast_fp16")];
+            tensor<string, []> var_8467_equation_0 = const()[name = tensor<string, []>("op_8467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8467_cast_fp16 = einsum(equation = var_8467_equation_0, values = (var_8289_cast_fp16, var_8120_cast_fp16))[name = tensor<string, []>("op_8467_cast_fp16")];
+            tensor<fp16, []> var_8468_to_fp16 = const()[name = tensor<string, []>("op_8468_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_823_cast_fp16 = mul(x = var_8467_cast_fp16, y = var_8468_to_fp16)[name = tensor<string, []>("aw_chunk_823_cast_fp16")];
+            tensor<string, []> var_8471_equation_0 = const()[name = tensor<string, []>("op_8471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8471_cast_fp16 = einsum(equation = var_8471_equation_0, values = (var_8293_cast_fp16, var_8127_cast_fp16))[name = tensor<string, []>("op_8471_cast_fp16")];
+            tensor<fp16, []> var_8472_to_fp16 = const()[name = tensor<string, []>("op_8472_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_825_cast_fp16 = mul(x = var_8471_cast_fp16, y = var_8472_to_fp16)[name = tensor<string, []>("aw_chunk_825_cast_fp16")];
+            tensor<string, []> var_8475_equation_0 = const()[name = tensor<string, []>("op_8475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8475_cast_fp16 = einsum(equation = var_8475_equation_0, values = (var_8293_cast_fp16, var_8134_cast_fp16))[name = tensor<string, []>("op_8475_cast_fp16")];
+            tensor<fp16, []> var_8476_to_fp16 = const()[name = tensor<string, []>("op_8476_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_827_cast_fp16 = mul(x = var_8475_cast_fp16, y = var_8476_to_fp16)[name = tensor<string, []>("aw_chunk_827_cast_fp16")];
+            tensor<string, []> var_8479_equation_0 = const()[name = tensor<string, []>("op_8479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8479_cast_fp16 = einsum(equation = var_8479_equation_0, values = (var_8293_cast_fp16, var_8141_cast_fp16))[name = tensor<string, []>("op_8479_cast_fp16")];
+            tensor<fp16, []> var_8480_to_fp16 = const()[name = tensor<string, []>("op_8480_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_829_cast_fp16 = mul(x = var_8479_cast_fp16, y = var_8480_to_fp16)[name = tensor<string, []>("aw_chunk_829_cast_fp16")];
+            tensor<string, []> var_8483_equation_0 = const()[name = tensor<string, []>("op_8483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8483_cast_fp16 = einsum(equation = var_8483_equation_0, values = (var_8293_cast_fp16, var_8148_cast_fp16))[name = tensor<string, []>("op_8483_cast_fp16")];
+            tensor<fp16, []> var_8484_to_fp16 = const()[name = tensor<string, []>("op_8484_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_831_cast_fp16 = mul(x = var_8483_cast_fp16, y = var_8484_to_fp16)[name = tensor<string, []>("aw_chunk_831_cast_fp16")];
+            tensor<string, []> var_8487_equation_0 = const()[name = tensor<string, []>("op_8487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8487_cast_fp16 = einsum(equation = var_8487_equation_0, values = (var_8297_cast_fp16, var_8155_cast_fp16))[name = tensor<string, []>("op_8487_cast_fp16")];
+            tensor<fp16, []> var_8488_to_fp16 = const()[name = tensor<string, []>("op_8488_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_833_cast_fp16 = mul(x = var_8487_cast_fp16, y = var_8488_to_fp16)[name = tensor<string, []>("aw_chunk_833_cast_fp16")];
+            tensor<string, []> var_8491_equation_0 = const()[name = tensor<string, []>("op_8491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8491_cast_fp16 = einsum(equation = var_8491_equation_0, values = (var_8297_cast_fp16, var_8162_cast_fp16))[name = tensor<string, []>("op_8491_cast_fp16")];
+            tensor<fp16, []> var_8492_to_fp16 = const()[name = tensor<string, []>("op_8492_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_835_cast_fp16 = mul(x = var_8491_cast_fp16, y = var_8492_to_fp16)[name = tensor<string, []>("aw_chunk_835_cast_fp16")];
+            tensor<string, []> var_8495_equation_0 = const()[name = tensor<string, []>("op_8495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8495_cast_fp16 = einsum(equation = var_8495_equation_0, values = (var_8297_cast_fp16, var_8169_cast_fp16))[name = tensor<string, []>("op_8495_cast_fp16")];
+            tensor<fp16, []> var_8496_to_fp16 = const()[name = tensor<string, []>("op_8496_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_837_cast_fp16 = mul(x = var_8495_cast_fp16, y = var_8496_to_fp16)[name = tensor<string, []>("aw_chunk_837_cast_fp16")];
+            tensor<string, []> var_8499_equation_0 = const()[name = tensor<string, []>("op_8499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8499_cast_fp16 = einsum(equation = var_8499_equation_0, values = (var_8297_cast_fp16, var_8176_cast_fp16))[name = tensor<string, []>("op_8499_cast_fp16")];
+            tensor<fp16, []> var_8500_to_fp16 = const()[name = tensor<string, []>("op_8500_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_839_cast_fp16 = mul(x = var_8499_cast_fp16, y = var_8500_to_fp16)[name = tensor<string, []>("aw_chunk_839_cast_fp16")];
+            tensor<string, []> var_8503_equation_0 = const()[name = tensor<string, []>("op_8503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8503_cast_fp16 = einsum(equation = var_8503_equation_0, values = (var_8301_cast_fp16, var_8183_cast_fp16))[name = tensor<string, []>("op_8503_cast_fp16")];
+            tensor<fp16, []> var_8504_to_fp16 = const()[name = tensor<string, []>("op_8504_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_841_cast_fp16 = mul(x = var_8503_cast_fp16, y = var_8504_to_fp16)[name = tensor<string, []>("aw_chunk_841_cast_fp16")];
+            tensor<string, []> var_8507_equation_0 = const()[name = tensor<string, []>("op_8507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8507_cast_fp16 = einsum(equation = var_8507_equation_0, values = (var_8301_cast_fp16, var_8190_cast_fp16))[name = tensor<string, []>("op_8507_cast_fp16")];
+            tensor<fp16, []> var_8508_to_fp16 = const()[name = tensor<string, []>("op_8508_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_843_cast_fp16 = mul(x = var_8507_cast_fp16, y = var_8508_to_fp16)[name = tensor<string, []>("aw_chunk_843_cast_fp16")];
+            tensor<string, []> var_8511_equation_0 = const()[name = tensor<string, []>("op_8511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8511_cast_fp16 = einsum(equation = var_8511_equation_0, values = (var_8301_cast_fp16, var_8197_cast_fp16))[name = tensor<string, []>("op_8511_cast_fp16")];
+            tensor<fp16, []> var_8512_to_fp16 = const()[name = tensor<string, []>("op_8512_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_845_cast_fp16 = mul(x = var_8511_cast_fp16, y = var_8512_to_fp16)[name = tensor<string, []>("aw_chunk_845_cast_fp16")];
+            tensor<string, []> var_8515_equation_0 = const()[name = tensor<string, []>("op_8515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8515_cast_fp16 = einsum(equation = var_8515_equation_0, values = (var_8301_cast_fp16, var_8204_cast_fp16))[name = tensor<string, []>("op_8515_cast_fp16")];
+            tensor<fp16, []> var_8516_to_fp16 = const()[name = tensor<string, []>("op_8516_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_847_cast_fp16 = mul(x = var_8515_cast_fp16, y = var_8516_to_fp16)[name = tensor<string, []>("aw_chunk_847_cast_fp16")];
+            tensor<string, []> var_8519_equation_0 = const()[name = tensor<string, []>("op_8519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8519_cast_fp16 = einsum(equation = var_8519_equation_0, values = (var_8305_cast_fp16, var_8211_cast_fp16))[name = tensor<string, []>("op_8519_cast_fp16")];
+            tensor<fp16, []> var_8520_to_fp16 = const()[name = tensor<string, []>("op_8520_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_849_cast_fp16 = mul(x = var_8519_cast_fp16, y = var_8520_to_fp16)[name = tensor<string, []>("aw_chunk_849_cast_fp16")];
+            tensor<string, []> var_8523_equation_0 = const()[name = tensor<string, []>("op_8523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8523_cast_fp16 = einsum(equation = var_8523_equation_0, values = (var_8305_cast_fp16, var_8218_cast_fp16))[name = tensor<string, []>("op_8523_cast_fp16")];
+            tensor<fp16, []> var_8524_to_fp16 = const()[name = tensor<string, []>("op_8524_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_851_cast_fp16 = mul(x = var_8523_cast_fp16, y = var_8524_to_fp16)[name = tensor<string, []>("aw_chunk_851_cast_fp16")];
+            tensor<string, []> var_8527_equation_0 = const()[name = tensor<string, []>("op_8527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8527_cast_fp16 = einsum(equation = var_8527_equation_0, values = (var_8305_cast_fp16, var_8225_cast_fp16))[name = tensor<string, []>("op_8527_cast_fp16")];
+            tensor<fp16, []> var_8528_to_fp16 = const()[name = tensor<string, []>("op_8528_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_853_cast_fp16 = mul(x = var_8527_cast_fp16, y = var_8528_to_fp16)[name = tensor<string, []>("aw_chunk_853_cast_fp16")];
+            tensor<string, []> var_8531_equation_0 = const()[name = tensor<string, []>("op_8531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8531_cast_fp16 = einsum(equation = var_8531_equation_0, values = (var_8305_cast_fp16, var_8232_cast_fp16))[name = tensor<string, []>("op_8531_cast_fp16")];
+            tensor<fp16, []> var_8532_to_fp16 = const()[name = tensor<string, []>("op_8532_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_855_cast_fp16 = mul(x = var_8531_cast_fp16, y = var_8532_to_fp16)[name = tensor<string, []>("aw_chunk_855_cast_fp16")];
+            tensor<string, []> var_8535_equation_0 = const()[name = tensor<string, []>("op_8535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8535_cast_fp16 = einsum(equation = var_8535_equation_0, values = (var_8309_cast_fp16, var_8239_cast_fp16))[name = tensor<string, []>("op_8535_cast_fp16")];
+            tensor<fp16, []> var_8536_to_fp16 = const()[name = tensor<string, []>("op_8536_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_857_cast_fp16 = mul(x = var_8535_cast_fp16, y = var_8536_to_fp16)[name = tensor<string, []>("aw_chunk_857_cast_fp16")];
+            tensor<string, []> var_8539_equation_0 = const()[name = tensor<string, []>("op_8539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8539_cast_fp16 = einsum(equation = var_8539_equation_0, values = (var_8309_cast_fp16, var_8246_cast_fp16))[name = tensor<string, []>("op_8539_cast_fp16")];
+            tensor<fp16, []> var_8540_to_fp16 = const()[name = tensor<string, []>("op_8540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_859_cast_fp16 = mul(x = var_8539_cast_fp16, y = var_8540_to_fp16)[name = tensor<string, []>("aw_chunk_859_cast_fp16")];
+            tensor<string, []> var_8543_equation_0 = const()[name = tensor<string, []>("op_8543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8543_cast_fp16 = einsum(equation = var_8543_equation_0, values = (var_8309_cast_fp16, var_8253_cast_fp16))[name = tensor<string, []>("op_8543_cast_fp16")];
+            tensor<fp16, []> var_8544_to_fp16 = const()[name = tensor<string, []>("op_8544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_861_cast_fp16 = mul(x = var_8543_cast_fp16, y = var_8544_to_fp16)[name = tensor<string, []>("aw_chunk_861_cast_fp16")];
+            tensor<string, []> var_8547_equation_0 = const()[name = tensor<string, []>("op_8547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8547_cast_fp16 = einsum(equation = var_8547_equation_0, values = (var_8309_cast_fp16, var_8260_cast_fp16))[name = tensor<string, []>("op_8547_cast_fp16")];
+            tensor<fp16, []> var_8548_to_fp16 = const()[name = tensor<string, []>("op_8548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_863_cast_fp16 = mul(x = var_8547_cast_fp16, y = var_8548_to_fp16)[name = tensor<string, []>("aw_chunk_863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8550_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_769_cast_fp16)[name = tensor<string, []>("op_8550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8551_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_771_cast_fp16)[name = tensor<string, []>("op_8551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8552_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_773_cast_fp16)[name = tensor<string, []>("op_8552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8553_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_775_cast_fp16)[name = tensor<string, []>("op_8553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8554_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_777_cast_fp16)[name = tensor<string, []>("op_8554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8555_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_779_cast_fp16)[name = tensor<string, []>("op_8555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8556_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_781_cast_fp16)[name = tensor<string, []>("op_8556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8557_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_783_cast_fp16)[name = tensor<string, []>("op_8557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8558_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_785_cast_fp16)[name = tensor<string, []>("op_8558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8559_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_787_cast_fp16)[name = tensor<string, []>("op_8559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8560_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_789_cast_fp16)[name = tensor<string, []>("op_8560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8561_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_791_cast_fp16)[name = tensor<string, []>("op_8561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8562_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_793_cast_fp16)[name = tensor<string, []>("op_8562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8563_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_795_cast_fp16)[name = tensor<string, []>("op_8563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8564_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_797_cast_fp16)[name = tensor<string, []>("op_8564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8565_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_799_cast_fp16)[name = tensor<string, []>("op_8565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8566_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_801_cast_fp16)[name = tensor<string, []>("op_8566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8567_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_803_cast_fp16)[name = tensor<string, []>("op_8567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8568_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_805_cast_fp16)[name = tensor<string, []>("op_8568_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8569_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_807_cast_fp16)[name = tensor<string, []>("op_8569_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8570_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_809_cast_fp16)[name = tensor<string, []>("op_8570_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8571_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_811_cast_fp16)[name = tensor<string, []>("op_8571_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8572_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_813_cast_fp16)[name = tensor<string, []>("op_8572_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8573_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_815_cast_fp16)[name = tensor<string, []>("op_8573_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8574_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_817_cast_fp16)[name = tensor<string, []>("op_8574_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8575_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_819_cast_fp16)[name = tensor<string, []>("op_8575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8576_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_821_cast_fp16)[name = tensor<string, []>("op_8576_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8577_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_823_cast_fp16)[name = tensor<string, []>("op_8577_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8578_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_825_cast_fp16)[name = tensor<string, []>("op_8578_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8579_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_827_cast_fp16)[name = tensor<string, []>("op_8579_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8580_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_829_cast_fp16)[name = tensor<string, []>("op_8580_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8581_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_831_cast_fp16)[name = tensor<string, []>("op_8581_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8582_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_833_cast_fp16)[name = tensor<string, []>("op_8582_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8583_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_835_cast_fp16)[name = tensor<string, []>("op_8583_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8584_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_837_cast_fp16)[name = tensor<string, []>("op_8584_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8585_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_839_cast_fp16)[name = tensor<string, []>("op_8585_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8586_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_841_cast_fp16)[name = tensor<string, []>("op_8586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8587_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_843_cast_fp16)[name = tensor<string, []>("op_8587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8588_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_845_cast_fp16)[name = tensor<string, []>("op_8588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8589_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_847_cast_fp16)[name = tensor<string, []>("op_8589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8590_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_849_cast_fp16)[name = tensor<string, []>("op_8590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8591_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_851_cast_fp16)[name = tensor<string, []>("op_8591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8592_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_853_cast_fp16)[name = tensor<string, []>("op_8592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8593_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_855_cast_fp16)[name = tensor<string, []>("op_8593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8594_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_857_cast_fp16)[name = tensor<string, []>("op_8594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8595_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_859_cast_fp16)[name = tensor<string, []>("op_8595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8596_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_861_cast_fp16)[name = tensor<string, []>("op_8596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_8597_cast_fp16 = softmax(axis = var_7823, x = aw_chunk_863_cast_fp16)[name = tensor<string, []>("op_8597_cast_fp16")];
+            tensor<string, []> var_8599_equation_0 = const()[name = tensor<string, []>("op_8599_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8599_cast_fp16 = einsum(equation = var_8599_equation_0, values = (var_8311_cast_fp16, var_8550_cast_fp16))[name = tensor<string, []>("op_8599_cast_fp16")];
+            tensor<string, []> var_8601_equation_0 = const()[name = tensor<string, []>("op_8601_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8601_cast_fp16 = einsum(equation = var_8601_equation_0, values = (var_8311_cast_fp16, var_8551_cast_fp16))[name = tensor<string, []>("op_8601_cast_fp16")];
+            tensor<string, []> var_8603_equation_0 = const()[name = tensor<string, []>("op_8603_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8603_cast_fp16 = einsum(equation = var_8603_equation_0, values = (var_8311_cast_fp16, var_8552_cast_fp16))[name = tensor<string, []>("op_8603_cast_fp16")];
+            tensor<string, []> var_8605_equation_0 = const()[name = tensor<string, []>("op_8605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8605_cast_fp16 = einsum(equation = var_8605_equation_0, values = (var_8311_cast_fp16, var_8553_cast_fp16))[name = tensor<string, []>("op_8605_cast_fp16")];
+            tensor<string, []> var_8607_equation_0 = const()[name = tensor<string, []>("op_8607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8607_cast_fp16 = einsum(equation = var_8607_equation_0, values = (var_8315_cast_fp16, var_8554_cast_fp16))[name = tensor<string, []>("op_8607_cast_fp16")];
+            tensor<string, []> var_8609_equation_0 = const()[name = tensor<string, []>("op_8609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8609_cast_fp16 = einsum(equation = var_8609_equation_0, values = (var_8315_cast_fp16, var_8555_cast_fp16))[name = tensor<string, []>("op_8609_cast_fp16")];
+            tensor<string, []> var_8611_equation_0 = const()[name = tensor<string, []>("op_8611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8611_cast_fp16 = einsum(equation = var_8611_equation_0, values = (var_8315_cast_fp16, var_8556_cast_fp16))[name = tensor<string, []>("op_8611_cast_fp16")];
+            tensor<string, []> var_8613_equation_0 = const()[name = tensor<string, []>("op_8613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8613_cast_fp16 = einsum(equation = var_8613_equation_0, values = (var_8315_cast_fp16, var_8557_cast_fp16))[name = tensor<string, []>("op_8613_cast_fp16")];
+            tensor<string, []> var_8615_equation_0 = const()[name = tensor<string, []>("op_8615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8615_cast_fp16 = einsum(equation = var_8615_equation_0, values = (var_8319_cast_fp16, var_8558_cast_fp16))[name = tensor<string, []>("op_8615_cast_fp16")];
+            tensor<string, []> var_8617_equation_0 = const()[name = tensor<string, []>("op_8617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8617_cast_fp16 = einsum(equation = var_8617_equation_0, values = (var_8319_cast_fp16, var_8559_cast_fp16))[name = tensor<string, []>("op_8617_cast_fp16")];
+            tensor<string, []> var_8619_equation_0 = const()[name = tensor<string, []>("op_8619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8619_cast_fp16 = einsum(equation = var_8619_equation_0, values = (var_8319_cast_fp16, var_8560_cast_fp16))[name = tensor<string, []>("op_8619_cast_fp16")];
+            tensor<string, []> var_8621_equation_0 = const()[name = tensor<string, []>("op_8621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8621_cast_fp16 = einsum(equation = var_8621_equation_0, values = (var_8319_cast_fp16, var_8561_cast_fp16))[name = tensor<string, []>("op_8621_cast_fp16")];
+            tensor<string, []> var_8623_equation_0 = const()[name = tensor<string, []>("op_8623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8623_cast_fp16 = einsum(equation = var_8623_equation_0, values = (var_8323_cast_fp16, var_8562_cast_fp16))[name = tensor<string, []>("op_8623_cast_fp16")];
+            tensor<string, []> var_8625_equation_0 = const()[name = tensor<string, []>("op_8625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8625_cast_fp16 = einsum(equation = var_8625_equation_0, values = (var_8323_cast_fp16, var_8563_cast_fp16))[name = tensor<string, []>("op_8625_cast_fp16")];
+            tensor<string, []> var_8627_equation_0 = const()[name = tensor<string, []>("op_8627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8627_cast_fp16 = einsum(equation = var_8627_equation_0, values = (var_8323_cast_fp16, var_8564_cast_fp16))[name = tensor<string, []>("op_8627_cast_fp16")];
+            tensor<string, []> var_8629_equation_0 = const()[name = tensor<string, []>("op_8629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8629_cast_fp16 = einsum(equation = var_8629_equation_0, values = (var_8323_cast_fp16, var_8565_cast_fp16))[name = tensor<string, []>("op_8629_cast_fp16")];
+            tensor<string, []> var_8631_equation_0 = const()[name = tensor<string, []>("op_8631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8631_cast_fp16 = einsum(equation = var_8631_equation_0, values = (var_8327_cast_fp16, var_8566_cast_fp16))[name = tensor<string, []>("op_8631_cast_fp16")];
+            tensor<string, []> var_8633_equation_0 = const()[name = tensor<string, []>("op_8633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8633_cast_fp16 = einsum(equation = var_8633_equation_0, values = (var_8327_cast_fp16, var_8567_cast_fp16))[name = tensor<string, []>("op_8633_cast_fp16")];
+            tensor<string, []> var_8635_equation_0 = const()[name = tensor<string, []>("op_8635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8635_cast_fp16 = einsum(equation = var_8635_equation_0, values = (var_8327_cast_fp16, var_8568_cast_fp16))[name = tensor<string, []>("op_8635_cast_fp16")];
+            tensor<string, []> var_8637_equation_0 = const()[name = tensor<string, []>("op_8637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8637_cast_fp16 = einsum(equation = var_8637_equation_0, values = (var_8327_cast_fp16, var_8569_cast_fp16))[name = tensor<string, []>("op_8637_cast_fp16")];
+            tensor<string, []> var_8639_equation_0 = const()[name = tensor<string, []>("op_8639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8639_cast_fp16 = einsum(equation = var_8639_equation_0, values = (var_8331_cast_fp16, var_8570_cast_fp16))[name = tensor<string, []>("op_8639_cast_fp16")];
+            tensor<string, []> var_8641_equation_0 = const()[name = tensor<string, []>("op_8641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8641_cast_fp16 = einsum(equation = var_8641_equation_0, values = (var_8331_cast_fp16, var_8571_cast_fp16))[name = tensor<string, []>("op_8641_cast_fp16")];
+            tensor<string, []> var_8643_equation_0 = const()[name = tensor<string, []>("op_8643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8643_cast_fp16 = einsum(equation = var_8643_equation_0, values = (var_8331_cast_fp16, var_8572_cast_fp16))[name = tensor<string, []>("op_8643_cast_fp16")];
+            tensor<string, []> var_8645_equation_0 = const()[name = tensor<string, []>("op_8645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8645_cast_fp16 = einsum(equation = var_8645_equation_0, values = (var_8331_cast_fp16, var_8573_cast_fp16))[name = tensor<string, []>("op_8645_cast_fp16")];
+            tensor<string, []> var_8647_equation_0 = const()[name = tensor<string, []>("op_8647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8647_cast_fp16 = einsum(equation = var_8647_equation_0, values = (var_8335_cast_fp16, var_8574_cast_fp16))[name = tensor<string, []>("op_8647_cast_fp16")];
+            tensor<string, []> var_8649_equation_0 = const()[name = tensor<string, []>("op_8649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8649_cast_fp16 = einsum(equation = var_8649_equation_0, values = (var_8335_cast_fp16, var_8575_cast_fp16))[name = tensor<string, []>("op_8649_cast_fp16")];
+            tensor<string, []> var_8651_equation_0 = const()[name = tensor<string, []>("op_8651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8651_cast_fp16 = einsum(equation = var_8651_equation_0, values = (var_8335_cast_fp16, var_8576_cast_fp16))[name = tensor<string, []>("op_8651_cast_fp16")];
+            tensor<string, []> var_8653_equation_0 = const()[name = tensor<string, []>("op_8653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8653_cast_fp16 = einsum(equation = var_8653_equation_0, values = (var_8335_cast_fp16, var_8577_cast_fp16))[name = tensor<string, []>("op_8653_cast_fp16")];
+            tensor<string, []> var_8655_equation_0 = const()[name = tensor<string, []>("op_8655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8655_cast_fp16 = einsum(equation = var_8655_equation_0, values = (var_8339_cast_fp16, var_8578_cast_fp16))[name = tensor<string, []>("op_8655_cast_fp16")];
+            tensor<string, []> var_8657_equation_0 = const()[name = tensor<string, []>("op_8657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8657_cast_fp16 = einsum(equation = var_8657_equation_0, values = (var_8339_cast_fp16, var_8579_cast_fp16))[name = tensor<string, []>("op_8657_cast_fp16")];
+            tensor<string, []> var_8659_equation_0 = const()[name = tensor<string, []>("op_8659_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8659_cast_fp16 = einsum(equation = var_8659_equation_0, values = (var_8339_cast_fp16, var_8580_cast_fp16))[name = tensor<string, []>("op_8659_cast_fp16")];
+            tensor<string, []> var_8661_equation_0 = const()[name = tensor<string, []>("op_8661_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8661_cast_fp16 = einsum(equation = var_8661_equation_0, values = (var_8339_cast_fp16, var_8581_cast_fp16))[name = tensor<string, []>("op_8661_cast_fp16")];
+            tensor<string, []> var_8663_equation_0 = const()[name = tensor<string, []>("op_8663_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8663_cast_fp16 = einsum(equation = var_8663_equation_0, values = (var_8343_cast_fp16, var_8582_cast_fp16))[name = tensor<string, []>("op_8663_cast_fp16")];
+            tensor<string, []> var_8665_equation_0 = const()[name = tensor<string, []>("op_8665_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8665_cast_fp16 = einsum(equation = var_8665_equation_0, values = (var_8343_cast_fp16, var_8583_cast_fp16))[name = tensor<string, []>("op_8665_cast_fp16")];
+            tensor<string, []> var_8667_equation_0 = const()[name = tensor<string, []>("op_8667_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8667_cast_fp16 = einsum(equation = var_8667_equation_0, values = (var_8343_cast_fp16, var_8584_cast_fp16))[name = tensor<string, []>("op_8667_cast_fp16")];
+            tensor<string, []> var_8669_equation_0 = const()[name = tensor<string, []>("op_8669_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8669_cast_fp16 = einsum(equation = var_8669_equation_0, values = (var_8343_cast_fp16, var_8585_cast_fp16))[name = tensor<string, []>("op_8669_cast_fp16")];
+            tensor<string, []> var_8671_equation_0 = const()[name = tensor<string, []>("op_8671_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8671_cast_fp16 = einsum(equation = var_8671_equation_0, values = (var_8347_cast_fp16, var_8586_cast_fp16))[name = tensor<string, []>("op_8671_cast_fp16")];
+            tensor<string, []> var_8673_equation_0 = const()[name = tensor<string, []>("op_8673_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8673_cast_fp16 = einsum(equation = var_8673_equation_0, values = (var_8347_cast_fp16, var_8587_cast_fp16))[name = tensor<string, []>("op_8673_cast_fp16")];
+            tensor<string, []> var_8675_equation_0 = const()[name = tensor<string, []>("op_8675_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8675_cast_fp16 = einsum(equation = var_8675_equation_0, values = (var_8347_cast_fp16, var_8588_cast_fp16))[name = tensor<string, []>("op_8675_cast_fp16")];
+            tensor<string, []> var_8677_equation_0 = const()[name = tensor<string, []>("op_8677_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8677_cast_fp16 = einsum(equation = var_8677_equation_0, values = (var_8347_cast_fp16, var_8589_cast_fp16))[name = tensor<string, []>("op_8677_cast_fp16")];
+            tensor<string, []> var_8679_equation_0 = const()[name = tensor<string, []>("op_8679_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8679_cast_fp16 = einsum(equation = var_8679_equation_0, values = (var_8351_cast_fp16, var_8590_cast_fp16))[name = tensor<string, []>("op_8679_cast_fp16")];
+            tensor<string, []> var_8681_equation_0 = const()[name = tensor<string, []>("op_8681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8681_cast_fp16 = einsum(equation = var_8681_equation_0, values = (var_8351_cast_fp16, var_8591_cast_fp16))[name = tensor<string, []>("op_8681_cast_fp16")];
+            tensor<string, []> var_8683_equation_0 = const()[name = tensor<string, []>("op_8683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8683_cast_fp16 = einsum(equation = var_8683_equation_0, values = (var_8351_cast_fp16, var_8592_cast_fp16))[name = tensor<string, []>("op_8683_cast_fp16")];
+            tensor<string, []> var_8685_equation_0 = const()[name = tensor<string, []>("op_8685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8685_cast_fp16 = einsum(equation = var_8685_equation_0, values = (var_8351_cast_fp16, var_8593_cast_fp16))[name = tensor<string, []>("op_8685_cast_fp16")];
+            tensor<string, []> var_8687_equation_0 = const()[name = tensor<string, []>("op_8687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8687_cast_fp16 = einsum(equation = var_8687_equation_0, values = (var_8355_cast_fp16, var_8594_cast_fp16))[name = tensor<string, []>("op_8687_cast_fp16")];
+            tensor<string, []> var_8689_equation_0 = const()[name = tensor<string, []>("op_8689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8689_cast_fp16 = einsum(equation = var_8689_equation_0, values = (var_8355_cast_fp16, var_8595_cast_fp16))[name = tensor<string, []>("op_8689_cast_fp16")];
+            tensor<string, []> var_8691_equation_0 = const()[name = tensor<string, []>("op_8691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8691_cast_fp16 = einsum(equation = var_8691_equation_0, values = (var_8355_cast_fp16, var_8596_cast_fp16))[name = tensor<string, []>("op_8691_cast_fp16")];
+            tensor<string, []> var_8693_equation_0 = const()[name = tensor<string, []>("op_8693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_8693_cast_fp16 = einsum(equation = var_8693_equation_0, values = (var_8355_cast_fp16, var_8597_cast_fp16))[name = tensor<string, []>("op_8693_cast_fp16")];
+            tensor<bool, []> var_8695_interleave_0 = const()[name = tensor<string, []>("op_8695_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8695_cast_fp16 = concat(axis = var_7806, interleave = var_8695_interleave_0, values = (var_8599_cast_fp16, var_8601_cast_fp16, var_8603_cast_fp16, var_8605_cast_fp16))[name = tensor<string, []>("op_8695_cast_fp16")];
+            tensor<bool, []> var_8697_interleave_0 = const()[name = tensor<string, []>("op_8697_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8697_cast_fp16 = concat(axis = var_7806, interleave = var_8697_interleave_0, values = (var_8607_cast_fp16, var_8609_cast_fp16, var_8611_cast_fp16, var_8613_cast_fp16))[name = tensor<string, []>("op_8697_cast_fp16")];
+            tensor<bool, []> var_8699_interleave_0 = const()[name = tensor<string, []>("op_8699_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8699_cast_fp16 = concat(axis = var_7806, interleave = var_8699_interleave_0, values = (var_8615_cast_fp16, var_8617_cast_fp16, var_8619_cast_fp16, var_8621_cast_fp16))[name = tensor<string, []>("op_8699_cast_fp16")];
+            tensor<bool, []> var_8701_interleave_0 = const()[name = tensor<string, []>("op_8701_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8701_cast_fp16 = concat(axis = var_7806, interleave = var_8701_interleave_0, values = (var_8623_cast_fp16, var_8625_cast_fp16, var_8627_cast_fp16, var_8629_cast_fp16))[name = tensor<string, []>("op_8701_cast_fp16")];
+            tensor<bool, []> var_8703_interleave_0 = const()[name = tensor<string, []>("op_8703_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8703_cast_fp16 = concat(axis = var_7806, interleave = var_8703_interleave_0, values = (var_8631_cast_fp16, var_8633_cast_fp16, var_8635_cast_fp16, var_8637_cast_fp16))[name = tensor<string, []>("op_8703_cast_fp16")];
+            tensor<bool, []> var_8705_interleave_0 = const()[name = tensor<string, []>("op_8705_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8705_cast_fp16 = concat(axis = var_7806, interleave = var_8705_interleave_0, values = (var_8639_cast_fp16, var_8641_cast_fp16, var_8643_cast_fp16, var_8645_cast_fp16))[name = tensor<string, []>("op_8705_cast_fp16")];
+            tensor<bool, []> var_8707_interleave_0 = const()[name = tensor<string, []>("op_8707_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8707_cast_fp16 = concat(axis = var_7806, interleave = var_8707_interleave_0, values = (var_8647_cast_fp16, var_8649_cast_fp16, var_8651_cast_fp16, var_8653_cast_fp16))[name = tensor<string, []>("op_8707_cast_fp16")];
+            tensor<bool, []> var_8709_interleave_0 = const()[name = tensor<string, []>("op_8709_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8709_cast_fp16 = concat(axis = var_7806, interleave = var_8709_interleave_0, values = (var_8655_cast_fp16, var_8657_cast_fp16, var_8659_cast_fp16, var_8661_cast_fp16))[name = tensor<string, []>("op_8709_cast_fp16")];
+            tensor<bool, []> var_8711_interleave_0 = const()[name = tensor<string, []>("op_8711_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8711_cast_fp16 = concat(axis = var_7806, interleave = var_8711_interleave_0, values = (var_8663_cast_fp16, var_8665_cast_fp16, var_8667_cast_fp16, var_8669_cast_fp16))[name = tensor<string, []>("op_8711_cast_fp16")];
+            tensor<bool, []> var_8713_interleave_0 = const()[name = tensor<string, []>("op_8713_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8713_cast_fp16 = concat(axis = var_7806, interleave = var_8713_interleave_0, values = (var_8671_cast_fp16, var_8673_cast_fp16, var_8675_cast_fp16, var_8677_cast_fp16))[name = tensor<string, []>("op_8713_cast_fp16")];
+            tensor<bool, []> var_8715_interleave_0 = const()[name = tensor<string, []>("op_8715_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8715_cast_fp16 = concat(axis = var_7806, interleave = var_8715_interleave_0, values = (var_8679_cast_fp16, var_8681_cast_fp16, var_8683_cast_fp16, var_8685_cast_fp16))[name = tensor<string, []>("op_8715_cast_fp16")];
+            tensor<bool, []> var_8717_interleave_0 = const()[name = tensor<string, []>("op_8717_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8717_cast_fp16 = concat(axis = var_7806, interleave = var_8717_interleave_0, values = (var_8687_cast_fp16, var_8689_cast_fp16, var_8691_cast_fp16, var_8693_cast_fp16))[name = tensor<string, []>("op_8717_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_65_cast_fp16 = concat(axis = var_7823, interleave = input_65_interleave_0, values = (var_8695_cast_fp16, var_8697_cast_fp16, var_8699_cast_fp16, var_8701_cast_fp16, var_8703_cast_fp16, var_8705_cast_fp16, var_8707_cast_fp16, var_8709_cast_fp16, var_8711_cast_fp16, var_8713_cast_fp16, var_8715_cast_fp16, var_8717_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<int32, [2]> var_8722 = const()[name = tensor<string, []>("op_8722"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8724 = const()[name = tensor<string, []>("op_8724"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123165120)))];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124344832)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_35_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = var_8724, groups = var_7823, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_8722, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> var_8730 = const()[name = tensor<string, []>("op_8730"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_35_cast_fp16 = reduce_mean(axes = var_8730, keep_dims = var_7824, x = inputs_35_cast_fp16)[name = tensor<string, []>("channels_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_35_cast_fp16 = sub(x = inputs_35_cast_fp16, y = channels_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = zero_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_sq_35_cast_fp16")];
+            tensor<int32, [1]> var_8734 = const()[name = tensor<string, []>("op_8734"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_8735_cast_fp16 = reduce_mean(axes = var_8734, keep_dims = var_7824, x = zero_mean_sq_35_cast_fp16)[name = tensor<string, []>("op_8735_cast_fp16")];
+            tensor<fp16, []> var_8736_to_fp16 = const()[name = tensor<string, []>("op_8736_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_8737_cast_fp16 = add(x = var_8735_cast_fp16, y = var_8736_to_fp16)[name = tensor<string, []>("op_8737_cast_fp16")];
+            tensor<fp16, []> denom_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0_to_fp16, x = var_8737_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = denom_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124346432)))];
+            tensor<fp16, [768]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124348032)))];
+            tensor<fp16, []> input_67_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_67_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<int32, [2]> var_8748 = const()[name = tensor<string, []>("op_8748"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8750 = const()[name = tensor<string, []>("op_8750"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124349632)))];
+            tensor<fp16, [3072]> layers_8_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129068288)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_69_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = var_8750, groups = var_7823, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = var_8748, weight = layers_8_fc1_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<int32, [2]> var_8756 = const()[name = tensor<string, []>("op_8756"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8758 = const()[name = tensor<string, []>("op_8758"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_21_pad_type_0 = const()[name = tensor<string, []>("hidden_states_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_21_pad_0 = const()[name = tensor<string, []>("hidden_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129074496)))];
+            tensor<fp16, [768]> layers_8_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133793152)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_21_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = var_8758, groups = var_7823, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = var_8756, weight = layers_8_fc2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_8765 = const()[name = tensor<string, []>("op_8765"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_8782 = const()[name = tensor<string, []>("op_8782"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_8783 = const()[name = tensor<string, []>("op_8783"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_8793 = const()[name = tensor<string, []>("op_8793"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_37_cast_fp16 = reduce_mean(axes = var_8793, keep_dims = var_8783, x = inputs_37_cast_fp16)[name = tensor<string, []>("channels_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_37_cast_fp16 = sub(x = inputs_37_cast_fp16, y = channels_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = zero_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_sq_37_cast_fp16")];
+            tensor<int32, [1]> var_8797 = const()[name = tensor<string, []>("op_8797"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_8798_cast_fp16 = reduce_mean(axes = var_8797, keep_dims = var_8783, x = zero_mean_sq_37_cast_fp16)[name = tensor<string, []>("op_8798_cast_fp16")];
+            tensor<fp16, []> var_8799_to_fp16 = const()[name = tensor<string, []>("op_8799_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_8800_cast_fp16 = add(x = var_8798_cast_fp16, y = var_8799_to_fp16)[name = tensor<string, []>("op_8800_cast_fp16")];
+            tensor<fp16, []> denom_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0_to_fp16, x = var_8800_cast_fp16)[name = tensor<string, []>("denom_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = denom_37_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133794752)))];
+            tensor<fp16, [768]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133796352)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<int32, [2]> var_8815 = const()[name = tensor<string, []>("op_8815"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8817 = const()[name = tensor<string, []>("op_8817"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133797952)))];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134977664)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_19_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = var_8817, groups = var_8782, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_8815, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<int32, [2]> var_8821 = const()[name = tensor<string, []>("op_8821"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8823 = const()[name = tensor<string, []>("op_8823"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134979264)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_19_cast_fp16 = conv(dilations = var_8823, groups = var_8782, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_8821, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<int32, [2]> var_8828 = const()[name = tensor<string, []>("op_8828"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_8830 = const()[name = tensor<string, []>("op_8830"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136158976)))];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137338688)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = var_8830, groups = var_8782, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_8828, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_8837_begin_0 = const()[name = tensor<string, []>("op_8837_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8837_end_0 = const()[name = tensor<string, []>("op_8837_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8837_end_mask_0 = const()[name = tensor<string, []>("op_8837_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8837_cast_fp16 = slice_by_index(begin = var_8837_begin_0, end = var_8837_end_0, end_mask = var_8837_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8837_cast_fp16")];
+            tensor<int32, [4]> var_8841_begin_0 = const()[name = tensor<string, []>("op_8841_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_8841_end_0 = const()[name = tensor<string, []>("op_8841_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_8841_end_mask_0 = const()[name = tensor<string, []>("op_8841_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8841_cast_fp16 = slice_by_index(begin = var_8841_begin_0, end = var_8841_end_0, end_mask = var_8841_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8841_cast_fp16")];
+            tensor<int32, [4]> var_8845_begin_0 = const()[name = tensor<string, []>("op_8845_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_8845_end_0 = const()[name = tensor<string, []>("op_8845_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_8845_end_mask_0 = const()[name = tensor<string, []>("op_8845_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8845_cast_fp16 = slice_by_index(begin = var_8845_begin_0, end = var_8845_end_0, end_mask = var_8845_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8845_cast_fp16")];
+            tensor<int32, [4]> var_8849_begin_0 = const()[name = tensor<string, []>("op_8849_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_8849_end_0 = const()[name = tensor<string, []>("op_8849_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_8849_end_mask_0 = const()[name = tensor<string, []>("op_8849_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8849_cast_fp16 = slice_by_index(begin = var_8849_begin_0, end = var_8849_end_0, end_mask = var_8849_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8849_cast_fp16")];
+            tensor<int32, [4]> var_8853_begin_0 = const()[name = tensor<string, []>("op_8853_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_8853_end_0 = const()[name = tensor<string, []>("op_8853_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_8853_end_mask_0 = const()[name = tensor<string, []>("op_8853_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8853_cast_fp16 = slice_by_index(begin = var_8853_begin_0, end = var_8853_end_0, end_mask = var_8853_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8853_cast_fp16")];
+            tensor<int32, [4]> var_8857_begin_0 = const()[name = tensor<string, []>("op_8857_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_8857_end_0 = const()[name = tensor<string, []>("op_8857_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_8857_end_mask_0 = const()[name = tensor<string, []>("op_8857_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8857_cast_fp16 = slice_by_index(begin = var_8857_begin_0, end = var_8857_end_0, end_mask = var_8857_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8857_cast_fp16")];
+            tensor<int32, [4]> var_8861_begin_0 = const()[name = tensor<string, []>("op_8861_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_8861_end_0 = const()[name = tensor<string, []>("op_8861_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_8861_end_mask_0 = const()[name = tensor<string, []>("op_8861_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8861_cast_fp16 = slice_by_index(begin = var_8861_begin_0, end = var_8861_end_0, end_mask = var_8861_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8861_cast_fp16")];
+            tensor<int32, [4]> var_8865_begin_0 = const()[name = tensor<string, []>("op_8865_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_8865_end_0 = const()[name = tensor<string, []>("op_8865_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_8865_end_mask_0 = const()[name = tensor<string, []>("op_8865_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8865_cast_fp16 = slice_by_index(begin = var_8865_begin_0, end = var_8865_end_0, end_mask = var_8865_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8865_cast_fp16")];
+            tensor<int32, [4]> var_8869_begin_0 = const()[name = tensor<string, []>("op_8869_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_8869_end_0 = const()[name = tensor<string, []>("op_8869_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_8869_end_mask_0 = const()[name = tensor<string, []>("op_8869_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8869_cast_fp16 = slice_by_index(begin = var_8869_begin_0, end = var_8869_end_0, end_mask = var_8869_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8869_cast_fp16")];
+            tensor<int32, [4]> var_8873_begin_0 = const()[name = tensor<string, []>("op_8873_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_8873_end_0 = const()[name = tensor<string, []>("op_8873_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_8873_end_mask_0 = const()[name = tensor<string, []>("op_8873_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8873_cast_fp16 = slice_by_index(begin = var_8873_begin_0, end = var_8873_end_0, end_mask = var_8873_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8873_cast_fp16")];
+            tensor<int32, [4]> var_8877_begin_0 = const()[name = tensor<string, []>("op_8877_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_8877_end_0 = const()[name = tensor<string, []>("op_8877_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_8877_end_mask_0 = const()[name = tensor<string, []>("op_8877_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8877_cast_fp16 = slice_by_index(begin = var_8877_begin_0, end = var_8877_end_0, end_mask = var_8877_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8877_cast_fp16")];
+            tensor<int32, [4]> var_8881_begin_0 = const()[name = tensor<string, []>("op_8881_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_8881_end_0 = const()[name = tensor<string, []>("op_8881_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_8881_end_mask_0 = const()[name = tensor<string, []>("op_8881_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_8881_cast_fp16 = slice_by_index(begin = var_8881_begin_0, end = var_8881_end_0, end_mask = var_8881_end_mask_0, x = query_19_cast_fp16)[name = tensor<string, []>("op_8881_cast_fp16")];
+            tensor<int32, [4]> var_8890_begin_0 = const()[name = tensor<string, []>("op_8890_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8890_end_0 = const()[name = tensor<string, []>("op_8890_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8890_end_mask_0 = const()[name = tensor<string, []>("op_8890_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8890_cast_fp16 = slice_by_index(begin = var_8890_begin_0, end = var_8890_end_0, end_mask = var_8890_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8890_cast_fp16")];
+            tensor<int32, [4]> var_8897_begin_0 = const()[name = tensor<string, []>("op_8897_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8897_end_0 = const()[name = tensor<string, []>("op_8897_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8897_end_mask_0 = const()[name = tensor<string, []>("op_8897_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8897_cast_fp16 = slice_by_index(begin = var_8897_begin_0, end = var_8897_end_0, end_mask = var_8897_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8897_cast_fp16")];
+            tensor<int32, [4]> var_8904_begin_0 = const()[name = tensor<string, []>("op_8904_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8904_end_0 = const()[name = tensor<string, []>("op_8904_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8904_end_mask_0 = const()[name = tensor<string, []>("op_8904_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8904_cast_fp16 = slice_by_index(begin = var_8904_begin_0, end = var_8904_end_0, end_mask = var_8904_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8904_cast_fp16")];
+            tensor<int32, [4]> var_8911_begin_0 = const()[name = tensor<string, []>("op_8911_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8911_end_0 = const()[name = tensor<string, []>("op_8911_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8911_end_mask_0 = const()[name = tensor<string, []>("op_8911_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8911_cast_fp16 = slice_by_index(begin = var_8911_begin_0, end = var_8911_end_0, end_mask = var_8911_end_mask_0, x = var_8837_cast_fp16)[name = tensor<string, []>("op_8911_cast_fp16")];
+            tensor<int32, [4]> var_8918_begin_0 = const()[name = tensor<string, []>("op_8918_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8918_end_0 = const()[name = tensor<string, []>("op_8918_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8918_end_mask_0 = const()[name = tensor<string, []>("op_8918_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8918_cast_fp16 = slice_by_index(begin = var_8918_begin_0, end = var_8918_end_0, end_mask = var_8918_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8918_cast_fp16")];
+            tensor<int32, [4]> var_8925_begin_0 = const()[name = tensor<string, []>("op_8925_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8925_end_0 = const()[name = tensor<string, []>("op_8925_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8925_end_mask_0 = const()[name = tensor<string, []>("op_8925_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8925_cast_fp16 = slice_by_index(begin = var_8925_begin_0, end = var_8925_end_0, end_mask = var_8925_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8925_cast_fp16")];
+            tensor<int32, [4]> var_8932_begin_0 = const()[name = tensor<string, []>("op_8932_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8932_end_0 = const()[name = tensor<string, []>("op_8932_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8932_end_mask_0 = const()[name = tensor<string, []>("op_8932_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8932_cast_fp16 = slice_by_index(begin = var_8932_begin_0, end = var_8932_end_0, end_mask = var_8932_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8932_cast_fp16")];
+            tensor<int32, [4]> var_8939_begin_0 = const()[name = tensor<string, []>("op_8939_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8939_end_0 = const()[name = tensor<string, []>("op_8939_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8939_end_mask_0 = const()[name = tensor<string, []>("op_8939_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8939_cast_fp16 = slice_by_index(begin = var_8939_begin_0, end = var_8939_end_0, end_mask = var_8939_end_mask_0, x = var_8841_cast_fp16)[name = tensor<string, []>("op_8939_cast_fp16")];
+            tensor<int32, [4]> var_8946_begin_0 = const()[name = tensor<string, []>("op_8946_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8946_end_0 = const()[name = tensor<string, []>("op_8946_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8946_end_mask_0 = const()[name = tensor<string, []>("op_8946_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8946_cast_fp16 = slice_by_index(begin = var_8946_begin_0, end = var_8946_end_0, end_mask = var_8946_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8946_cast_fp16")];
+            tensor<int32, [4]> var_8953_begin_0 = const()[name = tensor<string, []>("op_8953_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8953_end_0 = const()[name = tensor<string, []>("op_8953_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8953_end_mask_0 = const()[name = tensor<string, []>("op_8953_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8953_cast_fp16 = slice_by_index(begin = var_8953_begin_0, end = var_8953_end_0, end_mask = var_8953_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8953_cast_fp16")];
+            tensor<int32, [4]> var_8960_begin_0 = const()[name = tensor<string, []>("op_8960_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8960_end_0 = const()[name = tensor<string, []>("op_8960_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8960_end_mask_0 = const()[name = tensor<string, []>("op_8960_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8960_cast_fp16 = slice_by_index(begin = var_8960_begin_0, end = var_8960_end_0, end_mask = var_8960_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8960_cast_fp16")];
+            tensor<int32, [4]> var_8967_begin_0 = const()[name = tensor<string, []>("op_8967_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8967_end_0 = const()[name = tensor<string, []>("op_8967_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8967_end_mask_0 = const()[name = tensor<string, []>("op_8967_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8967_cast_fp16 = slice_by_index(begin = var_8967_begin_0, end = var_8967_end_0, end_mask = var_8967_end_mask_0, x = var_8845_cast_fp16)[name = tensor<string, []>("op_8967_cast_fp16")];
+            tensor<int32, [4]> var_8974_begin_0 = const()[name = tensor<string, []>("op_8974_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_8974_end_0 = const()[name = tensor<string, []>("op_8974_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_8974_end_mask_0 = const()[name = tensor<string, []>("op_8974_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8974_cast_fp16 = slice_by_index(begin = var_8974_begin_0, end = var_8974_end_0, end_mask = var_8974_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8974_cast_fp16")];
+            tensor<int32, [4]> var_8981_begin_0 = const()[name = tensor<string, []>("op_8981_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_8981_end_0 = const()[name = tensor<string, []>("op_8981_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_8981_end_mask_0 = const()[name = tensor<string, []>("op_8981_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8981_cast_fp16 = slice_by_index(begin = var_8981_begin_0, end = var_8981_end_0, end_mask = var_8981_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8981_cast_fp16")];
+            tensor<int32, [4]> var_8988_begin_0 = const()[name = tensor<string, []>("op_8988_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_8988_end_0 = const()[name = tensor<string, []>("op_8988_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_8988_end_mask_0 = const()[name = tensor<string, []>("op_8988_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8988_cast_fp16 = slice_by_index(begin = var_8988_begin_0, end = var_8988_end_0, end_mask = var_8988_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8988_cast_fp16")];
+            tensor<int32, [4]> var_8995_begin_0 = const()[name = tensor<string, []>("op_8995_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_8995_end_0 = const()[name = tensor<string, []>("op_8995_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_8995_end_mask_0 = const()[name = tensor<string, []>("op_8995_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_8995_cast_fp16 = slice_by_index(begin = var_8995_begin_0, end = var_8995_end_0, end_mask = var_8995_end_mask_0, x = var_8849_cast_fp16)[name = tensor<string, []>("op_8995_cast_fp16")];
+            tensor<int32, [4]> var_9002_begin_0 = const()[name = tensor<string, []>("op_9002_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9002_end_0 = const()[name = tensor<string, []>("op_9002_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9002_end_mask_0 = const()[name = tensor<string, []>("op_9002_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9002_cast_fp16 = slice_by_index(begin = var_9002_begin_0, end = var_9002_end_0, end_mask = var_9002_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9002_cast_fp16")];
+            tensor<int32, [4]> var_9009_begin_0 = const()[name = tensor<string, []>("op_9009_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9009_end_0 = const()[name = tensor<string, []>("op_9009_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9009_end_mask_0 = const()[name = tensor<string, []>("op_9009_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9009_cast_fp16 = slice_by_index(begin = var_9009_begin_0, end = var_9009_end_0, end_mask = var_9009_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9009_cast_fp16")];
+            tensor<int32, [4]> var_9016_begin_0 = const()[name = tensor<string, []>("op_9016_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9016_end_0 = const()[name = tensor<string, []>("op_9016_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9016_end_mask_0 = const()[name = tensor<string, []>("op_9016_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9016_cast_fp16 = slice_by_index(begin = var_9016_begin_0, end = var_9016_end_0, end_mask = var_9016_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9016_cast_fp16")];
+            tensor<int32, [4]> var_9023_begin_0 = const()[name = tensor<string, []>("op_9023_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9023_end_0 = const()[name = tensor<string, []>("op_9023_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9023_end_mask_0 = const()[name = tensor<string, []>("op_9023_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9023_cast_fp16 = slice_by_index(begin = var_9023_begin_0, end = var_9023_end_0, end_mask = var_9023_end_mask_0, x = var_8853_cast_fp16)[name = tensor<string, []>("op_9023_cast_fp16")];
+            tensor<int32, [4]> var_9030_begin_0 = const()[name = tensor<string, []>("op_9030_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9030_end_0 = const()[name = tensor<string, []>("op_9030_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9030_end_mask_0 = const()[name = tensor<string, []>("op_9030_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9030_cast_fp16 = slice_by_index(begin = var_9030_begin_0, end = var_9030_end_0, end_mask = var_9030_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9030_cast_fp16")];
+            tensor<int32, [4]> var_9037_begin_0 = const()[name = tensor<string, []>("op_9037_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9037_end_0 = const()[name = tensor<string, []>("op_9037_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9037_end_mask_0 = const()[name = tensor<string, []>("op_9037_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9037_cast_fp16 = slice_by_index(begin = var_9037_begin_0, end = var_9037_end_0, end_mask = var_9037_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9037_cast_fp16")];
+            tensor<int32, [4]> var_9044_begin_0 = const()[name = tensor<string, []>("op_9044_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9044_end_0 = const()[name = tensor<string, []>("op_9044_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9044_end_mask_0 = const()[name = tensor<string, []>("op_9044_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9044_cast_fp16 = slice_by_index(begin = var_9044_begin_0, end = var_9044_end_0, end_mask = var_9044_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9044_cast_fp16")];
+            tensor<int32, [4]> var_9051_begin_0 = const()[name = tensor<string, []>("op_9051_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9051_end_0 = const()[name = tensor<string, []>("op_9051_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9051_end_mask_0 = const()[name = tensor<string, []>("op_9051_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9051_cast_fp16 = slice_by_index(begin = var_9051_begin_0, end = var_9051_end_0, end_mask = var_9051_end_mask_0, x = var_8857_cast_fp16)[name = tensor<string, []>("op_9051_cast_fp16")];
+            tensor<int32, [4]> var_9058_begin_0 = const()[name = tensor<string, []>("op_9058_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9058_end_0 = const()[name = tensor<string, []>("op_9058_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9058_end_mask_0 = const()[name = tensor<string, []>("op_9058_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9058_cast_fp16 = slice_by_index(begin = var_9058_begin_0, end = var_9058_end_0, end_mask = var_9058_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9058_cast_fp16")];
+            tensor<int32, [4]> var_9065_begin_0 = const()[name = tensor<string, []>("op_9065_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9065_end_0 = const()[name = tensor<string, []>("op_9065_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9065_end_mask_0 = const()[name = tensor<string, []>("op_9065_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9065_cast_fp16 = slice_by_index(begin = var_9065_begin_0, end = var_9065_end_0, end_mask = var_9065_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9065_cast_fp16")];
+            tensor<int32, [4]> var_9072_begin_0 = const()[name = tensor<string, []>("op_9072_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9072_end_0 = const()[name = tensor<string, []>("op_9072_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9072_end_mask_0 = const()[name = tensor<string, []>("op_9072_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9072_cast_fp16 = slice_by_index(begin = var_9072_begin_0, end = var_9072_end_0, end_mask = var_9072_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9072_cast_fp16")];
+            tensor<int32, [4]> var_9079_begin_0 = const()[name = tensor<string, []>("op_9079_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9079_end_0 = const()[name = tensor<string, []>("op_9079_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9079_end_mask_0 = const()[name = tensor<string, []>("op_9079_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9079_cast_fp16 = slice_by_index(begin = var_9079_begin_0, end = var_9079_end_0, end_mask = var_9079_end_mask_0, x = var_8861_cast_fp16)[name = tensor<string, []>("op_9079_cast_fp16")];
+            tensor<int32, [4]> var_9086_begin_0 = const()[name = tensor<string, []>("op_9086_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9086_end_0 = const()[name = tensor<string, []>("op_9086_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9086_end_mask_0 = const()[name = tensor<string, []>("op_9086_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9086_cast_fp16 = slice_by_index(begin = var_9086_begin_0, end = var_9086_end_0, end_mask = var_9086_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9086_cast_fp16")];
+            tensor<int32, [4]> var_9093_begin_0 = const()[name = tensor<string, []>("op_9093_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9093_end_0 = const()[name = tensor<string, []>("op_9093_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9093_end_mask_0 = const()[name = tensor<string, []>("op_9093_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9093_cast_fp16 = slice_by_index(begin = var_9093_begin_0, end = var_9093_end_0, end_mask = var_9093_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9093_cast_fp16")];
+            tensor<int32, [4]> var_9100_begin_0 = const()[name = tensor<string, []>("op_9100_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9100_end_0 = const()[name = tensor<string, []>("op_9100_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9100_end_mask_0 = const()[name = tensor<string, []>("op_9100_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9100_cast_fp16 = slice_by_index(begin = var_9100_begin_0, end = var_9100_end_0, end_mask = var_9100_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9100_cast_fp16")];
+            tensor<int32, [4]> var_9107_begin_0 = const()[name = tensor<string, []>("op_9107_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9107_end_0 = const()[name = tensor<string, []>("op_9107_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9107_end_mask_0 = const()[name = tensor<string, []>("op_9107_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9107_cast_fp16 = slice_by_index(begin = var_9107_begin_0, end = var_9107_end_0, end_mask = var_9107_end_mask_0, x = var_8865_cast_fp16)[name = tensor<string, []>("op_9107_cast_fp16")];
+            tensor<int32, [4]> var_9114_begin_0 = const()[name = tensor<string, []>("op_9114_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9114_end_0 = const()[name = tensor<string, []>("op_9114_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9114_end_mask_0 = const()[name = tensor<string, []>("op_9114_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9114_cast_fp16 = slice_by_index(begin = var_9114_begin_0, end = var_9114_end_0, end_mask = var_9114_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9114_cast_fp16")];
+            tensor<int32, [4]> var_9121_begin_0 = const()[name = tensor<string, []>("op_9121_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9121_end_0 = const()[name = tensor<string, []>("op_9121_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9121_end_mask_0 = const()[name = tensor<string, []>("op_9121_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9121_cast_fp16 = slice_by_index(begin = var_9121_begin_0, end = var_9121_end_0, end_mask = var_9121_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9121_cast_fp16")];
+            tensor<int32, [4]> var_9128_begin_0 = const()[name = tensor<string, []>("op_9128_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9128_end_0 = const()[name = tensor<string, []>("op_9128_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9128_end_mask_0 = const()[name = tensor<string, []>("op_9128_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9128_cast_fp16 = slice_by_index(begin = var_9128_begin_0, end = var_9128_end_0, end_mask = var_9128_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9128_cast_fp16")];
+            tensor<int32, [4]> var_9135_begin_0 = const()[name = tensor<string, []>("op_9135_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9135_end_0 = const()[name = tensor<string, []>("op_9135_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9135_end_mask_0 = const()[name = tensor<string, []>("op_9135_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9135_cast_fp16 = slice_by_index(begin = var_9135_begin_0, end = var_9135_end_0, end_mask = var_9135_end_mask_0, x = var_8869_cast_fp16)[name = tensor<string, []>("op_9135_cast_fp16")];
+            tensor<int32, [4]> var_9142_begin_0 = const()[name = tensor<string, []>("op_9142_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9142_end_0 = const()[name = tensor<string, []>("op_9142_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9142_end_mask_0 = const()[name = tensor<string, []>("op_9142_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9142_cast_fp16 = slice_by_index(begin = var_9142_begin_0, end = var_9142_end_0, end_mask = var_9142_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9142_cast_fp16")];
+            tensor<int32, [4]> var_9149_begin_0 = const()[name = tensor<string, []>("op_9149_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9149_end_0 = const()[name = tensor<string, []>("op_9149_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9149_end_mask_0 = const()[name = tensor<string, []>("op_9149_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9149_cast_fp16 = slice_by_index(begin = var_9149_begin_0, end = var_9149_end_0, end_mask = var_9149_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9149_cast_fp16")];
+            tensor<int32, [4]> var_9156_begin_0 = const()[name = tensor<string, []>("op_9156_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9156_end_0 = const()[name = tensor<string, []>("op_9156_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9156_end_mask_0 = const()[name = tensor<string, []>("op_9156_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9156_cast_fp16 = slice_by_index(begin = var_9156_begin_0, end = var_9156_end_0, end_mask = var_9156_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9156_cast_fp16")];
+            tensor<int32, [4]> var_9163_begin_0 = const()[name = tensor<string, []>("op_9163_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9163_end_0 = const()[name = tensor<string, []>("op_9163_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9163_end_mask_0 = const()[name = tensor<string, []>("op_9163_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9163_cast_fp16 = slice_by_index(begin = var_9163_begin_0, end = var_9163_end_0, end_mask = var_9163_end_mask_0, x = var_8873_cast_fp16)[name = tensor<string, []>("op_9163_cast_fp16")];
+            tensor<int32, [4]> var_9170_begin_0 = const()[name = tensor<string, []>("op_9170_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9170_end_0 = const()[name = tensor<string, []>("op_9170_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9170_end_mask_0 = const()[name = tensor<string, []>("op_9170_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9170_cast_fp16 = slice_by_index(begin = var_9170_begin_0, end = var_9170_end_0, end_mask = var_9170_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9170_cast_fp16")];
+            tensor<int32, [4]> var_9177_begin_0 = const()[name = tensor<string, []>("op_9177_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9177_end_0 = const()[name = tensor<string, []>("op_9177_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9177_end_mask_0 = const()[name = tensor<string, []>("op_9177_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9177_cast_fp16 = slice_by_index(begin = var_9177_begin_0, end = var_9177_end_0, end_mask = var_9177_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9177_cast_fp16")];
+            tensor<int32, [4]> var_9184_begin_0 = const()[name = tensor<string, []>("op_9184_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9184_end_0 = const()[name = tensor<string, []>("op_9184_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9184_end_mask_0 = const()[name = tensor<string, []>("op_9184_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9184_cast_fp16 = slice_by_index(begin = var_9184_begin_0, end = var_9184_end_0, end_mask = var_9184_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9184_cast_fp16")];
+            tensor<int32, [4]> var_9191_begin_0 = const()[name = tensor<string, []>("op_9191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9191_end_0 = const()[name = tensor<string, []>("op_9191_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9191_end_mask_0 = const()[name = tensor<string, []>("op_9191_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9191_cast_fp16 = slice_by_index(begin = var_9191_begin_0, end = var_9191_end_0, end_mask = var_9191_end_mask_0, x = var_8877_cast_fp16)[name = tensor<string, []>("op_9191_cast_fp16")];
+            tensor<int32, [4]> var_9198_begin_0 = const()[name = tensor<string, []>("op_9198_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9198_end_0 = const()[name = tensor<string, []>("op_9198_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9198_end_mask_0 = const()[name = tensor<string, []>("op_9198_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9198_cast_fp16 = slice_by_index(begin = var_9198_begin_0, end = var_9198_end_0, end_mask = var_9198_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9198_cast_fp16")];
+            tensor<int32, [4]> var_9205_begin_0 = const()[name = tensor<string, []>("op_9205_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9205_end_0 = const()[name = tensor<string, []>("op_9205_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9205_end_mask_0 = const()[name = tensor<string, []>("op_9205_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9205_cast_fp16 = slice_by_index(begin = var_9205_begin_0, end = var_9205_end_0, end_mask = var_9205_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9205_cast_fp16")];
+            tensor<int32, [4]> var_9212_begin_0 = const()[name = tensor<string, []>("op_9212_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9212_end_0 = const()[name = tensor<string, []>("op_9212_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9212_end_mask_0 = const()[name = tensor<string, []>("op_9212_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9212_cast_fp16 = slice_by_index(begin = var_9212_begin_0, end = var_9212_end_0, end_mask = var_9212_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9212_cast_fp16")];
+            tensor<int32, [4]> var_9219_begin_0 = const()[name = tensor<string, []>("op_9219_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9219_end_0 = const()[name = tensor<string, []>("op_9219_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9219_end_mask_0 = const()[name = tensor<string, []>("op_9219_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9219_cast_fp16 = slice_by_index(begin = var_9219_begin_0, end = var_9219_end_0, end_mask = var_9219_end_mask_0, x = var_8881_cast_fp16)[name = tensor<string, []>("op_9219_cast_fp16")];
+            tensor<int32, [4]> k_19_perm_0 = const()[name = tensor<string, []>("k_19_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_9224_begin_0 = const()[name = tensor<string, []>("op_9224_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9224_end_0 = const()[name = tensor<string, []>("op_9224_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_9224_end_mask_0 = const()[name = tensor<string, []>("op_9224_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_2 = transpose(perm = k_19_perm_0, x = key_19_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_9224_cast_fp16 = slice_by_index(begin = var_9224_begin_0, end = var_9224_end_0, end_mask = var_9224_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9224_cast_fp16")];
+            tensor<int32, [4]> var_9228_begin_0 = const()[name = tensor<string, []>("op_9228_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_9228_end_0 = const()[name = tensor<string, []>("op_9228_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_9228_end_mask_0 = const()[name = tensor<string, []>("op_9228_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9228_cast_fp16 = slice_by_index(begin = var_9228_begin_0, end = var_9228_end_0, end_mask = var_9228_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9228_cast_fp16")];
+            tensor<int32, [4]> var_9232_begin_0 = const()[name = tensor<string, []>("op_9232_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_9232_end_0 = const()[name = tensor<string, []>("op_9232_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_9232_end_mask_0 = const()[name = tensor<string, []>("op_9232_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9232_cast_fp16 = slice_by_index(begin = var_9232_begin_0, end = var_9232_end_0, end_mask = var_9232_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9232_cast_fp16")];
+            tensor<int32, [4]> var_9236_begin_0 = const()[name = tensor<string, []>("op_9236_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_9236_end_0 = const()[name = tensor<string, []>("op_9236_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_9236_end_mask_0 = const()[name = tensor<string, []>("op_9236_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9236_cast_fp16 = slice_by_index(begin = var_9236_begin_0, end = var_9236_end_0, end_mask = var_9236_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9236_cast_fp16")];
+            tensor<int32, [4]> var_9240_begin_0 = const()[name = tensor<string, []>("op_9240_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_9240_end_0 = const()[name = tensor<string, []>("op_9240_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_9240_end_mask_0 = const()[name = tensor<string, []>("op_9240_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9240_cast_fp16 = slice_by_index(begin = var_9240_begin_0, end = var_9240_end_0, end_mask = var_9240_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9240_cast_fp16")];
+            tensor<int32, [4]> var_9244_begin_0 = const()[name = tensor<string, []>("op_9244_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_9244_end_0 = const()[name = tensor<string, []>("op_9244_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_9244_end_mask_0 = const()[name = tensor<string, []>("op_9244_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9244_cast_fp16 = slice_by_index(begin = var_9244_begin_0, end = var_9244_end_0, end_mask = var_9244_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9244_cast_fp16")];
+            tensor<int32, [4]> var_9248_begin_0 = const()[name = tensor<string, []>("op_9248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_9248_end_0 = const()[name = tensor<string, []>("op_9248_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_9248_end_mask_0 = const()[name = tensor<string, []>("op_9248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9248_cast_fp16 = slice_by_index(begin = var_9248_begin_0, end = var_9248_end_0, end_mask = var_9248_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9248_cast_fp16")];
+            tensor<int32, [4]> var_9252_begin_0 = const()[name = tensor<string, []>("op_9252_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_9252_end_0 = const()[name = tensor<string, []>("op_9252_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_9252_end_mask_0 = const()[name = tensor<string, []>("op_9252_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9252_cast_fp16 = slice_by_index(begin = var_9252_begin_0, end = var_9252_end_0, end_mask = var_9252_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9252_cast_fp16")];
+            tensor<int32, [4]> var_9256_begin_0 = const()[name = tensor<string, []>("op_9256_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_9256_end_0 = const()[name = tensor<string, []>("op_9256_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_9256_end_mask_0 = const()[name = tensor<string, []>("op_9256_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9256_cast_fp16 = slice_by_index(begin = var_9256_begin_0, end = var_9256_end_0, end_mask = var_9256_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9256_cast_fp16")];
+            tensor<int32, [4]> var_9260_begin_0 = const()[name = tensor<string, []>("op_9260_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_9260_end_0 = const()[name = tensor<string, []>("op_9260_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_9260_end_mask_0 = const()[name = tensor<string, []>("op_9260_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9260_cast_fp16 = slice_by_index(begin = var_9260_begin_0, end = var_9260_end_0, end_mask = var_9260_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9260_cast_fp16")];
+            tensor<int32, [4]> var_9264_begin_0 = const()[name = tensor<string, []>("op_9264_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_9264_end_0 = const()[name = tensor<string, []>("op_9264_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_9264_end_mask_0 = const()[name = tensor<string, []>("op_9264_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9264_cast_fp16 = slice_by_index(begin = var_9264_begin_0, end = var_9264_end_0, end_mask = var_9264_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9264_cast_fp16")];
+            tensor<int32, [4]> var_9268_begin_0 = const()[name = tensor<string, []>("op_9268_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_9268_end_0 = const()[name = tensor<string, []>("op_9268_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_9268_end_mask_0 = const()[name = tensor<string, []>("op_9268_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_9268_cast_fp16 = slice_by_index(begin = var_9268_begin_0, end = var_9268_end_0, end_mask = var_9268_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_9268_cast_fp16")];
+            tensor<int32, [4]> var_9270_begin_0 = const()[name = tensor<string, []>("op_9270_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9270_end_0 = const()[name = tensor<string, []>("op_9270_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9270_end_mask_0 = const()[name = tensor<string, []>("op_9270_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9270_cast_fp16 = slice_by_index(begin = var_9270_begin_0, end = var_9270_end_0, end_mask = var_9270_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9270_cast_fp16")];
+            tensor<int32, [4]> var_9274_begin_0 = const()[name = tensor<string, []>("op_9274_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_9274_end_0 = const()[name = tensor<string, []>("op_9274_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_9274_end_mask_0 = const()[name = tensor<string, []>("op_9274_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9274_cast_fp16 = slice_by_index(begin = var_9274_begin_0, end = var_9274_end_0, end_mask = var_9274_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9274_cast_fp16")];
+            tensor<int32, [4]> var_9278_begin_0 = const()[name = tensor<string, []>("op_9278_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_9278_end_0 = const()[name = tensor<string, []>("op_9278_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_9278_end_mask_0 = const()[name = tensor<string, []>("op_9278_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9278_cast_fp16 = slice_by_index(begin = var_9278_begin_0, end = var_9278_end_0, end_mask = var_9278_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9278_cast_fp16")];
+            tensor<int32, [4]> var_9282_begin_0 = const()[name = tensor<string, []>("op_9282_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_9282_end_0 = const()[name = tensor<string, []>("op_9282_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_9282_end_mask_0 = const()[name = tensor<string, []>("op_9282_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9282_cast_fp16 = slice_by_index(begin = var_9282_begin_0, end = var_9282_end_0, end_mask = var_9282_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9282_cast_fp16")];
+            tensor<int32, [4]> var_9286_begin_0 = const()[name = tensor<string, []>("op_9286_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_9286_end_0 = const()[name = tensor<string, []>("op_9286_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_9286_end_mask_0 = const()[name = tensor<string, []>("op_9286_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9286_cast_fp16 = slice_by_index(begin = var_9286_begin_0, end = var_9286_end_0, end_mask = var_9286_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9286_cast_fp16")];
+            tensor<int32, [4]> var_9290_begin_0 = const()[name = tensor<string, []>("op_9290_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_9290_end_0 = const()[name = tensor<string, []>("op_9290_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_9290_end_mask_0 = const()[name = tensor<string, []>("op_9290_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9290_cast_fp16 = slice_by_index(begin = var_9290_begin_0, end = var_9290_end_0, end_mask = var_9290_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9290_cast_fp16")];
+            tensor<int32, [4]> var_9294_begin_0 = const()[name = tensor<string, []>("op_9294_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_9294_end_0 = const()[name = tensor<string, []>("op_9294_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_9294_end_mask_0 = const()[name = tensor<string, []>("op_9294_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9294_cast_fp16 = slice_by_index(begin = var_9294_begin_0, end = var_9294_end_0, end_mask = var_9294_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9294_cast_fp16")];
+            tensor<int32, [4]> var_9298_begin_0 = const()[name = tensor<string, []>("op_9298_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_9298_end_0 = const()[name = tensor<string, []>("op_9298_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_9298_end_mask_0 = const()[name = tensor<string, []>("op_9298_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9298_cast_fp16 = slice_by_index(begin = var_9298_begin_0, end = var_9298_end_0, end_mask = var_9298_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9298_cast_fp16")];
+            tensor<int32, [4]> var_9302_begin_0 = const()[name = tensor<string, []>("op_9302_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_9302_end_0 = const()[name = tensor<string, []>("op_9302_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_9302_end_mask_0 = const()[name = tensor<string, []>("op_9302_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9302_cast_fp16 = slice_by_index(begin = var_9302_begin_0, end = var_9302_end_0, end_mask = var_9302_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9302_cast_fp16")];
+            tensor<int32, [4]> var_9306_begin_0 = const()[name = tensor<string, []>("op_9306_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_9306_end_0 = const()[name = tensor<string, []>("op_9306_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_9306_end_mask_0 = const()[name = tensor<string, []>("op_9306_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9306_cast_fp16 = slice_by_index(begin = var_9306_begin_0, end = var_9306_end_0, end_mask = var_9306_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9306_cast_fp16")];
+            tensor<int32, [4]> var_9310_begin_0 = const()[name = tensor<string, []>("op_9310_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_9310_end_0 = const()[name = tensor<string, []>("op_9310_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_9310_end_mask_0 = const()[name = tensor<string, []>("op_9310_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9310_cast_fp16 = slice_by_index(begin = var_9310_begin_0, end = var_9310_end_0, end_mask = var_9310_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9310_cast_fp16")];
+            tensor<int32, [4]> var_9314_begin_0 = const()[name = tensor<string, []>("op_9314_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_9314_end_0 = const()[name = tensor<string, []>("op_9314_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_9314_end_mask_0 = const()[name = tensor<string, []>("op_9314_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9314_cast_fp16 = slice_by_index(begin = var_9314_begin_0, end = var_9314_end_0, end_mask = var_9314_end_mask_0, x = value_19_cast_fp16)[name = tensor<string, []>("op_9314_cast_fp16")];
+            tensor<string, []> var_9318_equation_0 = const()[name = tensor<string, []>("op_9318_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9318_cast_fp16 = einsum(equation = var_9318_equation_0, values = (var_9224_cast_fp16, var_8890_cast_fp16))[name = tensor<string, []>("op_9318_cast_fp16")];
+            tensor<fp16, []> var_9319_to_fp16 = const()[name = tensor<string, []>("op_9319_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_865_cast_fp16 = mul(x = var_9318_cast_fp16, y = var_9319_to_fp16)[name = tensor<string, []>("aw_chunk_865_cast_fp16")];
+            tensor<string, []> var_9322_equation_0 = const()[name = tensor<string, []>("op_9322_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9322_cast_fp16 = einsum(equation = var_9322_equation_0, values = (var_9224_cast_fp16, var_8897_cast_fp16))[name = tensor<string, []>("op_9322_cast_fp16")];
+            tensor<fp16, []> var_9323_to_fp16 = const()[name = tensor<string, []>("op_9323_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_867_cast_fp16 = mul(x = var_9322_cast_fp16, y = var_9323_to_fp16)[name = tensor<string, []>("aw_chunk_867_cast_fp16")];
+            tensor<string, []> var_9326_equation_0 = const()[name = tensor<string, []>("op_9326_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9326_cast_fp16 = einsum(equation = var_9326_equation_0, values = (var_9224_cast_fp16, var_8904_cast_fp16))[name = tensor<string, []>("op_9326_cast_fp16")];
+            tensor<fp16, []> var_9327_to_fp16 = const()[name = tensor<string, []>("op_9327_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_869_cast_fp16 = mul(x = var_9326_cast_fp16, y = var_9327_to_fp16)[name = tensor<string, []>("aw_chunk_869_cast_fp16")];
+            tensor<string, []> var_9330_equation_0 = const()[name = tensor<string, []>("op_9330_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9330_cast_fp16 = einsum(equation = var_9330_equation_0, values = (var_9224_cast_fp16, var_8911_cast_fp16))[name = tensor<string, []>("op_9330_cast_fp16")];
+            tensor<fp16, []> var_9331_to_fp16 = const()[name = tensor<string, []>("op_9331_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_871_cast_fp16 = mul(x = var_9330_cast_fp16, y = var_9331_to_fp16)[name = tensor<string, []>("aw_chunk_871_cast_fp16")];
+            tensor<string, []> var_9334_equation_0 = const()[name = tensor<string, []>("op_9334_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9334_cast_fp16 = einsum(equation = var_9334_equation_0, values = (var_9228_cast_fp16, var_8918_cast_fp16))[name = tensor<string, []>("op_9334_cast_fp16")];
+            tensor<fp16, []> var_9335_to_fp16 = const()[name = tensor<string, []>("op_9335_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_873_cast_fp16 = mul(x = var_9334_cast_fp16, y = var_9335_to_fp16)[name = tensor<string, []>("aw_chunk_873_cast_fp16")];
+            tensor<string, []> var_9338_equation_0 = const()[name = tensor<string, []>("op_9338_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9338_cast_fp16 = einsum(equation = var_9338_equation_0, values = (var_9228_cast_fp16, var_8925_cast_fp16))[name = tensor<string, []>("op_9338_cast_fp16")];
+            tensor<fp16, []> var_9339_to_fp16 = const()[name = tensor<string, []>("op_9339_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_875_cast_fp16 = mul(x = var_9338_cast_fp16, y = var_9339_to_fp16)[name = tensor<string, []>("aw_chunk_875_cast_fp16")];
+            tensor<string, []> var_9342_equation_0 = const()[name = tensor<string, []>("op_9342_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9342_cast_fp16 = einsum(equation = var_9342_equation_0, values = (var_9228_cast_fp16, var_8932_cast_fp16))[name = tensor<string, []>("op_9342_cast_fp16")];
+            tensor<fp16, []> var_9343_to_fp16 = const()[name = tensor<string, []>("op_9343_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_877_cast_fp16 = mul(x = var_9342_cast_fp16, y = var_9343_to_fp16)[name = tensor<string, []>("aw_chunk_877_cast_fp16")];
+            tensor<string, []> var_9346_equation_0 = const()[name = tensor<string, []>("op_9346_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9346_cast_fp16 = einsum(equation = var_9346_equation_0, values = (var_9228_cast_fp16, var_8939_cast_fp16))[name = tensor<string, []>("op_9346_cast_fp16")];
+            tensor<fp16, []> var_9347_to_fp16 = const()[name = tensor<string, []>("op_9347_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_879_cast_fp16 = mul(x = var_9346_cast_fp16, y = var_9347_to_fp16)[name = tensor<string, []>("aw_chunk_879_cast_fp16")];
+            tensor<string, []> var_9350_equation_0 = const()[name = tensor<string, []>("op_9350_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9350_cast_fp16 = einsum(equation = var_9350_equation_0, values = (var_9232_cast_fp16, var_8946_cast_fp16))[name = tensor<string, []>("op_9350_cast_fp16")];
+            tensor<fp16, []> var_9351_to_fp16 = const()[name = tensor<string, []>("op_9351_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_881_cast_fp16 = mul(x = var_9350_cast_fp16, y = var_9351_to_fp16)[name = tensor<string, []>("aw_chunk_881_cast_fp16")];
+            tensor<string, []> var_9354_equation_0 = const()[name = tensor<string, []>("op_9354_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9354_cast_fp16 = einsum(equation = var_9354_equation_0, values = (var_9232_cast_fp16, var_8953_cast_fp16))[name = tensor<string, []>("op_9354_cast_fp16")];
+            tensor<fp16, []> var_9355_to_fp16 = const()[name = tensor<string, []>("op_9355_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_883_cast_fp16 = mul(x = var_9354_cast_fp16, y = var_9355_to_fp16)[name = tensor<string, []>("aw_chunk_883_cast_fp16")];
+            tensor<string, []> var_9358_equation_0 = const()[name = tensor<string, []>("op_9358_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9358_cast_fp16 = einsum(equation = var_9358_equation_0, values = (var_9232_cast_fp16, var_8960_cast_fp16))[name = tensor<string, []>("op_9358_cast_fp16")];
+            tensor<fp16, []> var_9359_to_fp16 = const()[name = tensor<string, []>("op_9359_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_885_cast_fp16 = mul(x = var_9358_cast_fp16, y = var_9359_to_fp16)[name = tensor<string, []>("aw_chunk_885_cast_fp16")];
+            tensor<string, []> var_9362_equation_0 = const()[name = tensor<string, []>("op_9362_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9362_cast_fp16 = einsum(equation = var_9362_equation_0, values = (var_9232_cast_fp16, var_8967_cast_fp16))[name = tensor<string, []>("op_9362_cast_fp16")];
+            tensor<fp16, []> var_9363_to_fp16 = const()[name = tensor<string, []>("op_9363_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_887_cast_fp16 = mul(x = var_9362_cast_fp16, y = var_9363_to_fp16)[name = tensor<string, []>("aw_chunk_887_cast_fp16")];
+            tensor<string, []> var_9366_equation_0 = const()[name = tensor<string, []>("op_9366_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9366_cast_fp16 = einsum(equation = var_9366_equation_0, values = (var_9236_cast_fp16, var_8974_cast_fp16))[name = tensor<string, []>("op_9366_cast_fp16")];
+            tensor<fp16, []> var_9367_to_fp16 = const()[name = tensor<string, []>("op_9367_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_889_cast_fp16 = mul(x = var_9366_cast_fp16, y = var_9367_to_fp16)[name = tensor<string, []>("aw_chunk_889_cast_fp16")];
+            tensor<string, []> var_9370_equation_0 = const()[name = tensor<string, []>("op_9370_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9370_cast_fp16 = einsum(equation = var_9370_equation_0, values = (var_9236_cast_fp16, var_8981_cast_fp16))[name = tensor<string, []>("op_9370_cast_fp16")];
+            tensor<fp16, []> var_9371_to_fp16 = const()[name = tensor<string, []>("op_9371_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_891_cast_fp16 = mul(x = var_9370_cast_fp16, y = var_9371_to_fp16)[name = tensor<string, []>("aw_chunk_891_cast_fp16")];
+            tensor<string, []> var_9374_equation_0 = const()[name = tensor<string, []>("op_9374_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9374_cast_fp16 = einsum(equation = var_9374_equation_0, values = (var_9236_cast_fp16, var_8988_cast_fp16))[name = tensor<string, []>("op_9374_cast_fp16")];
+            tensor<fp16, []> var_9375_to_fp16 = const()[name = tensor<string, []>("op_9375_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_893_cast_fp16 = mul(x = var_9374_cast_fp16, y = var_9375_to_fp16)[name = tensor<string, []>("aw_chunk_893_cast_fp16")];
+            tensor<string, []> var_9378_equation_0 = const()[name = tensor<string, []>("op_9378_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9378_cast_fp16 = einsum(equation = var_9378_equation_0, values = (var_9236_cast_fp16, var_8995_cast_fp16))[name = tensor<string, []>("op_9378_cast_fp16")];
+            tensor<fp16, []> var_9379_to_fp16 = const()[name = tensor<string, []>("op_9379_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_895_cast_fp16 = mul(x = var_9378_cast_fp16, y = var_9379_to_fp16)[name = tensor<string, []>("aw_chunk_895_cast_fp16")];
+            tensor<string, []> var_9382_equation_0 = const()[name = tensor<string, []>("op_9382_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9382_cast_fp16 = einsum(equation = var_9382_equation_0, values = (var_9240_cast_fp16, var_9002_cast_fp16))[name = tensor<string, []>("op_9382_cast_fp16")];
+            tensor<fp16, []> var_9383_to_fp16 = const()[name = tensor<string, []>("op_9383_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_897_cast_fp16 = mul(x = var_9382_cast_fp16, y = var_9383_to_fp16)[name = tensor<string, []>("aw_chunk_897_cast_fp16")];
+            tensor<string, []> var_9386_equation_0 = const()[name = tensor<string, []>("op_9386_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9386_cast_fp16 = einsum(equation = var_9386_equation_0, values = (var_9240_cast_fp16, var_9009_cast_fp16))[name = tensor<string, []>("op_9386_cast_fp16")];
+            tensor<fp16, []> var_9387_to_fp16 = const()[name = tensor<string, []>("op_9387_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_899_cast_fp16 = mul(x = var_9386_cast_fp16, y = var_9387_to_fp16)[name = tensor<string, []>("aw_chunk_899_cast_fp16")];
+            tensor<string, []> var_9390_equation_0 = const()[name = tensor<string, []>("op_9390_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9390_cast_fp16 = einsum(equation = var_9390_equation_0, values = (var_9240_cast_fp16, var_9016_cast_fp16))[name = tensor<string, []>("op_9390_cast_fp16")];
+            tensor<fp16, []> var_9391_to_fp16 = const()[name = tensor<string, []>("op_9391_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_901_cast_fp16 = mul(x = var_9390_cast_fp16, y = var_9391_to_fp16)[name = tensor<string, []>("aw_chunk_901_cast_fp16")];
+            tensor<string, []> var_9394_equation_0 = const()[name = tensor<string, []>("op_9394_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9394_cast_fp16 = einsum(equation = var_9394_equation_0, values = (var_9240_cast_fp16, var_9023_cast_fp16))[name = tensor<string, []>("op_9394_cast_fp16")];
+            tensor<fp16, []> var_9395_to_fp16 = const()[name = tensor<string, []>("op_9395_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_903_cast_fp16 = mul(x = var_9394_cast_fp16, y = var_9395_to_fp16)[name = tensor<string, []>("aw_chunk_903_cast_fp16")];
+            tensor<string, []> var_9398_equation_0 = const()[name = tensor<string, []>("op_9398_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9398_cast_fp16 = einsum(equation = var_9398_equation_0, values = (var_9244_cast_fp16, var_9030_cast_fp16))[name = tensor<string, []>("op_9398_cast_fp16")];
+            tensor<fp16, []> var_9399_to_fp16 = const()[name = tensor<string, []>("op_9399_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_905_cast_fp16 = mul(x = var_9398_cast_fp16, y = var_9399_to_fp16)[name = tensor<string, []>("aw_chunk_905_cast_fp16")];
+            tensor<string, []> var_9402_equation_0 = const()[name = tensor<string, []>("op_9402_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9402_cast_fp16 = einsum(equation = var_9402_equation_0, values = (var_9244_cast_fp16, var_9037_cast_fp16))[name = tensor<string, []>("op_9402_cast_fp16")];
+            tensor<fp16, []> var_9403_to_fp16 = const()[name = tensor<string, []>("op_9403_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_907_cast_fp16 = mul(x = var_9402_cast_fp16, y = var_9403_to_fp16)[name = tensor<string, []>("aw_chunk_907_cast_fp16")];
+            tensor<string, []> var_9406_equation_0 = const()[name = tensor<string, []>("op_9406_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9406_cast_fp16 = einsum(equation = var_9406_equation_0, values = (var_9244_cast_fp16, var_9044_cast_fp16))[name = tensor<string, []>("op_9406_cast_fp16")];
+            tensor<fp16, []> var_9407_to_fp16 = const()[name = tensor<string, []>("op_9407_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_909_cast_fp16 = mul(x = var_9406_cast_fp16, y = var_9407_to_fp16)[name = tensor<string, []>("aw_chunk_909_cast_fp16")];
+            tensor<string, []> var_9410_equation_0 = const()[name = tensor<string, []>("op_9410_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9410_cast_fp16 = einsum(equation = var_9410_equation_0, values = (var_9244_cast_fp16, var_9051_cast_fp16))[name = tensor<string, []>("op_9410_cast_fp16")];
+            tensor<fp16, []> var_9411_to_fp16 = const()[name = tensor<string, []>("op_9411_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_911_cast_fp16 = mul(x = var_9410_cast_fp16, y = var_9411_to_fp16)[name = tensor<string, []>("aw_chunk_911_cast_fp16")];
+            tensor<string, []> var_9414_equation_0 = const()[name = tensor<string, []>("op_9414_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9414_cast_fp16 = einsum(equation = var_9414_equation_0, values = (var_9248_cast_fp16, var_9058_cast_fp16))[name = tensor<string, []>("op_9414_cast_fp16")];
+            tensor<fp16, []> var_9415_to_fp16 = const()[name = tensor<string, []>("op_9415_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_913_cast_fp16 = mul(x = var_9414_cast_fp16, y = var_9415_to_fp16)[name = tensor<string, []>("aw_chunk_913_cast_fp16")];
+            tensor<string, []> var_9418_equation_0 = const()[name = tensor<string, []>("op_9418_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9418_cast_fp16 = einsum(equation = var_9418_equation_0, values = (var_9248_cast_fp16, var_9065_cast_fp16))[name = tensor<string, []>("op_9418_cast_fp16")];
+            tensor<fp16, []> var_9419_to_fp16 = const()[name = tensor<string, []>("op_9419_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_915_cast_fp16 = mul(x = var_9418_cast_fp16, y = var_9419_to_fp16)[name = tensor<string, []>("aw_chunk_915_cast_fp16")];
+            tensor<string, []> var_9422_equation_0 = const()[name = tensor<string, []>("op_9422_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9422_cast_fp16 = einsum(equation = var_9422_equation_0, values = (var_9248_cast_fp16, var_9072_cast_fp16))[name = tensor<string, []>("op_9422_cast_fp16")];
+            tensor<fp16, []> var_9423_to_fp16 = const()[name = tensor<string, []>("op_9423_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_917_cast_fp16 = mul(x = var_9422_cast_fp16, y = var_9423_to_fp16)[name = tensor<string, []>("aw_chunk_917_cast_fp16")];
+            tensor<string, []> var_9426_equation_0 = const()[name = tensor<string, []>("op_9426_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9426_cast_fp16 = einsum(equation = var_9426_equation_0, values = (var_9248_cast_fp16, var_9079_cast_fp16))[name = tensor<string, []>("op_9426_cast_fp16")];
+            tensor<fp16, []> var_9427_to_fp16 = const()[name = tensor<string, []>("op_9427_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_919_cast_fp16 = mul(x = var_9426_cast_fp16, y = var_9427_to_fp16)[name = tensor<string, []>("aw_chunk_919_cast_fp16")];
+            tensor<string, []> var_9430_equation_0 = const()[name = tensor<string, []>("op_9430_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9430_cast_fp16 = einsum(equation = var_9430_equation_0, values = (var_9252_cast_fp16, var_9086_cast_fp16))[name = tensor<string, []>("op_9430_cast_fp16")];
+            tensor<fp16, []> var_9431_to_fp16 = const()[name = tensor<string, []>("op_9431_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_921_cast_fp16 = mul(x = var_9430_cast_fp16, y = var_9431_to_fp16)[name = tensor<string, []>("aw_chunk_921_cast_fp16")];
+            tensor<string, []> var_9434_equation_0 = const()[name = tensor<string, []>("op_9434_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9434_cast_fp16 = einsum(equation = var_9434_equation_0, values = (var_9252_cast_fp16, var_9093_cast_fp16))[name = tensor<string, []>("op_9434_cast_fp16")];
+            tensor<fp16, []> var_9435_to_fp16 = const()[name = tensor<string, []>("op_9435_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_923_cast_fp16 = mul(x = var_9434_cast_fp16, y = var_9435_to_fp16)[name = tensor<string, []>("aw_chunk_923_cast_fp16")];
+            tensor<string, []> var_9438_equation_0 = const()[name = tensor<string, []>("op_9438_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9438_cast_fp16 = einsum(equation = var_9438_equation_0, values = (var_9252_cast_fp16, var_9100_cast_fp16))[name = tensor<string, []>("op_9438_cast_fp16")];
+            tensor<fp16, []> var_9439_to_fp16 = const()[name = tensor<string, []>("op_9439_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_925_cast_fp16 = mul(x = var_9438_cast_fp16, y = var_9439_to_fp16)[name = tensor<string, []>("aw_chunk_925_cast_fp16")];
+            tensor<string, []> var_9442_equation_0 = const()[name = tensor<string, []>("op_9442_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9442_cast_fp16 = einsum(equation = var_9442_equation_0, values = (var_9252_cast_fp16, var_9107_cast_fp16))[name = tensor<string, []>("op_9442_cast_fp16")];
+            tensor<fp16, []> var_9443_to_fp16 = const()[name = tensor<string, []>("op_9443_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_927_cast_fp16 = mul(x = var_9442_cast_fp16, y = var_9443_to_fp16)[name = tensor<string, []>("aw_chunk_927_cast_fp16")];
+            tensor<string, []> var_9446_equation_0 = const()[name = tensor<string, []>("op_9446_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9446_cast_fp16 = einsum(equation = var_9446_equation_0, values = (var_9256_cast_fp16, var_9114_cast_fp16))[name = tensor<string, []>("op_9446_cast_fp16")];
+            tensor<fp16, []> var_9447_to_fp16 = const()[name = tensor<string, []>("op_9447_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_929_cast_fp16 = mul(x = var_9446_cast_fp16, y = var_9447_to_fp16)[name = tensor<string, []>("aw_chunk_929_cast_fp16")];
+            tensor<string, []> var_9450_equation_0 = const()[name = tensor<string, []>("op_9450_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9450_cast_fp16 = einsum(equation = var_9450_equation_0, values = (var_9256_cast_fp16, var_9121_cast_fp16))[name = tensor<string, []>("op_9450_cast_fp16")];
+            tensor<fp16, []> var_9451_to_fp16 = const()[name = tensor<string, []>("op_9451_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_931_cast_fp16 = mul(x = var_9450_cast_fp16, y = var_9451_to_fp16)[name = tensor<string, []>("aw_chunk_931_cast_fp16")];
+            tensor<string, []> var_9454_equation_0 = const()[name = tensor<string, []>("op_9454_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9454_cast_fp16 = einsum(equation = var_9454_equation_0, values = (var_9256_cast_fp16, var_9128_cast_fp16))[name = tensor<string, []>("op_9454_cast_fp16")];
+            tensor<fp16, []> var_9455_to_fp16 = const()[name = tensor<string, []>("op_9455_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_933_cast_fp16 = mul(x = var_9454_cast_fp16, y = var_9455_to_fp16)[name = tensor<string, []>("aw_chunk_933_cast_fp16")];
+            tensor<string, []> var_9458_equation_0 = const()[name = tensor<string, []>("op_9458_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9458_cast_fp16 = einsum(equation = var_9458_equation_0, values = (var_9256_cast_fp16, var_9135_cast_fp16))[name = tensor<string, []>("op_9458_cast_fp16")];
+            tensor<fp16, []> var_9459_to_fp16 = const()[name = tensor<string, []>("op_9459_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_935_cast_fp16 = mul(x = var_9458_cast_fp16, y = var_9459_to_fp16)[name = tensor<string, []>("aw_chunk_935_cast_fp16")];
+            tensor<string, []> var_9462_equation_0 = const()[name = tensor<string, []>("op_9462_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9462_cast_fp16 = einsum(equation = var_9462_equation_0, values = (var_9260_cast_fp16, var_9142_cast_fp16))[name = tensor<string, []>("op_9462_cast_fp16")];
+            tensor<fp16, []> var_9463_to_fp16 = const()[name = tensor<string, []>("op_9463_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_937_cast_fp16 = mul(x = var_9462_cast_fp16, y = var_9463_to_fp16)[name = tensor<string, []>("aw_chunk_937_cast_fp16")];
+            tensor<string, []> var_9466_equation_0 = const()[name = tensor<string, []>("op_9466_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9466_cast_fp16 = einsum(equation = var_9466_equation_0, values = (var_9260_cast_fp16, var_9149_cast_fp16))[name = tensor<string, []>("op_9466_cast_fp16")];
+            tensor<fp16, []> var_9467_to_fp16 = const()[name = tensor<string, []>("op_9467_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_939_cast_fp16 = mul(x = var_9466_cast_fp16, y = var_9467_to_fp16)[name = tensor<string, []>("aw_chunk_939_cast_fp16")];
+            tensor<string, []> var_9470_equation_0 = const()[name = tensor<string, []>("op_9470_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9470_cast_fp16 = einsum(equation = var_9470_equation_0, values = (var_9260_cast_fp16, var_9156_cast_fp16))[name = tensor<string, []>("op_9470_cast_fp16")];
+            tensor<fp16, []> var_9471_to_fp16 = const()[name = tensor<string, []>("op_9471_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_941_cast_fp16 = mul(x = var_9470_cast_fp16, y = var_9471_to_fp16)[name = tensor<string, []>("aw_chunk_941_cast_fp16")];
+            tensor<string, []> var_9474_equation_0 = const()[name = tensor<string, []>("op_9474_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9474_cast_fp16 = einsum(equation = var_9474_equation_0, values = (var_9260_cast_fp16, var_9163_cast_fp16))[name = tensor<string, []>("op_9474_cast_fp16")];
+            tensor<fp16, []> var_9475_to_fp16 = const()[name = tensor<string, []>("op_9475_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_943_cast_fp16 = mul(x = var_9474_cast_fp16, y = var_9475_to_fp16)[name = tensor<string, []>("aw_chunk_943_cast_fp16")];
+            tensor<string, []> var_9478_equation_0 = const()[name = tensor<string, []>("op_9478_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9478_cast_fp16 = einsum(equation = var_9478_equation_0, values = (var_9264_cast_fp16, var_9170_cast_fp16))[name = tensor<string, []>("op_9478_cast_fp16")];
+            tensor<fp16, []> var_9479_to_fp16 = const()[name = tensor<string, []>("op_9479_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_945_cast_fp16 = mul(x = var_9478_cast_fp16, y = var_9479_to_fp16)[name = tensor<string, []>("aw_chunk_945_cast_fp16")];
+            tensor<string, []> var_9482_equation_0 = const()[name = tensor<string, []>("op_9482_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9482_cast_fp16 = einsum(equation = var_9482_equation_0, values = (var_9264_cast_fp16, var_9177_cast_fp16))[name = tensor<string, []>("op_9482_cast_fp16")];
+            tensor<fp16, []> var_9483_to_fp16 = const()[name = tensor<string, []>("op_9483_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_947_cast_fp16 = mul(x = var_9482_cast_fp16, y = var_9483_to_fp16)[name = tensor<string, []>("aw_chunk_947_cast_fp16")];
+            tensor<string, []> var_9486_equation_0 = const()[name = tensor<string, []>("op_9486_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9486_cast_fp16 = einsum(equation = var_9486_equation_0, values = (var_9264_cast_fp16, var_9184_cast_fp16))[name = tensor<string, []>("op_9486_cast_fp16")];
+            tensor<fp16, []> var_9487_to_fp16 = const()[name = tensor<string, []>("op_9487_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_949_cast_fp16 = mul(x = var_9486_cast_fp16, y = var_9487_to_fp16)[name = tensor<string, []>("aw_chunk_949_cast_fp16")];
+            tensor<string, []> var_9490_equation_0 = const()[name = tensor<string, []>("op_9490_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9490_cast_fp16 = einsum(equation = var_9490_equation_0, values = (var_9264_cast_fp16, var_9191_cast_fp16))[name = tensor<string, []>("op_9490_cast_fp16")];
+            tensor<fp16, []> var_9491_to_fp16 = const()[name = tensor<string, []>("op_9491_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_951_cast_fp16 = mul(x = var_9490_cast_fp16, y = var_9491_to_fp16)[name = tensor<string, []>("aw_chunk_951_cast_fp16")];
+            tensor<string, []> var_9494_equation_0 = const()[name = tensor<string, []>("op_9494_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9494_cast_fp16 = einsum(equation = var_9494_equation_0, values = (var_9268_cast_fp16, var_9198_cast_fp16))[name = tensor<string, []>("op_9494_cast_fp16")];
+            tensor<fp16, []> var_9495_to_fp16 = const()[name = tensor<string, []>("op_9495_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_953_cast_fp16 = mul(x = var_9494_cast_fp16, y = var_9495_to_fp16)[name = tensor<string, []>("aw_chunk_953_cast_fp16")];
+            tensor<string, []> var_9498_equation_0 = const()[name = tensor<string, []>("op_9498_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9498_cast_fp16 = einsum(equation = var_9498_equation_0, values = (var_9268_cast_fp16, var_9205_cast_fp16))[name = tensor<string, []>("op_9498_cast_fp16")];
+            tensor<fp16, []> var_9499_to_fp16 = const()[name = tensor<string, []>("op_9499_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_955_cast_fp16 = mul(x = var_9498_cast_fp16, y = var_9499_to_fp16)[name = tensor<string, []>("aw_chunk_955_cast_fp16")];
+            tensor<string, []> var_9502_equation_0 = const()[name = tensor<string, []>("op_9502_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9502_cast_fp16 = einsum(equation = var_9502_equation_0, values = (var_9268_cast_fp16, var_9212_cast_fp16))[name = tensor<string, []>("op_9502_cast_fp16")];
+            tensor<fp16, []> var_9503_to_fp16 = const()[name = tensor<string, []>("op_9503_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_957_cast_fp16 = mul(x = var_9502_cast_fp16, y = var_9503_to_fp16)[name = tensor<string, []>("aw_chunk_957_cast_fp16")];
+            tensor<string, []> var_9506_equation_0 = const()[name = tensor<string, []>("op_9506_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9506_cast_fp16 = einsum(equation = var_9506_equation_0, values = (var_9268_cast_fp16, var_9219_cast_fp16))[name = tensor<string, []>("op_9506_cast_fp16")];
+            tensor<fp16, []> var_9507_to_fp16 = const()[name = tensor<string, []>("op_9507_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_959_cast_fp16 = mul(x = var_9506_cast_fp16, y = var_9507_to_fp16)[name = tensor<string, []>("aw_chunk_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9509_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_865_cast_fp16)[name = tensor<string, []>("op_9509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9510_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_867_cast_fp16)[name = tensor<string, []>("op_9510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9511_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_869_cast_fp16)[name = tensor<string, []>("op_9511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9512_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_871_cast_fp16)[name = tensor<string, []>("op_9512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9513_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_873_cast_fp16)[name = tensor<string, []>("op_9513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9514_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_875_cast_fp16)[name = tensor<string, []>("op_9514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9515_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_877_cast_fp16)[name = tensor<string, []>("op_9515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9516_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_879_cast_fp16)[name = tensor<string, []>("op_9516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9517_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_881_cast_fp16)[name = tensor<string, []>("op_9517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9518_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_883_cast_fp16)[name = tensor<string, []>("op_9518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9519_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_885_cast_fp16)[name = tensor<string, []>("op_9519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9520_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_887_cast_fp16)[name = tensor<string, []>("op_9520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9521_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_889_cast_fp16)[name = tensor<string, []>("op_9521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9522_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_891_cast_fp16)[name = tensor<string, []>("op_9522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9523_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_893_cast_fp16)[name = tensor<string, []>("op_9523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9524_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_895_cast_fp16)[name = tensor<string, []>("op_9524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9525_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_897_cast_fp16)[name = tensor<string, []>("op_9525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9526_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_899_cast_fp16)[name = tensor<string, []>("op_9526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9527_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_901_cast_fp16)[name = tensor<string, []>("op_9527_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9528_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_903_cast_fp16)[name = tensor<string, []>("op_9528_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9529_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_905_cast_fp16)[name = tensor<string, []>("op_9529_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9530_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_907_cast_fp16)[name = tensor<string, []>("op_9530_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9531_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_909_cast_fp16)[name = tensor<string, []>("op_9531_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9532_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_911_cast_fp16)[name = tensor<string, []>("op_9532_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9533_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_913_cast_fp16)[name = tensor<string, []>("op_9533_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9534_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_915_cast_fp16)[name = tensor<string, []>("op_9534_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9535_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_917_cast_fp16)[name = tensor<string, []>("op_9535_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9536_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_919_cast_fp16)[name = tensor<string, []>("op_9536_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9537_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_921_cast_fp16)[name = tensor<string, []>("op_9537_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9538_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_923_cast_fp16)[name = tensor<string, []>("op_9538_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9539_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_925_cast_fp16)[name = tensor<string, []>("op_9539_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9540_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_927_cast_fp16)[name = tensor<string, []>("op_9540_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9541_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_929_cast_fp16)[name = tensor<string, []>("op_9541_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9542_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_931_cast_fp16)[name = tensor<string, []>("op_9542_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9543_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_933_cast_fp16)[name = tensor<string, []>("op_9543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9544_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_935_cast_fp16)[name = tensor<string, []>("op_9544_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9545_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_937_cast_fp16)[name = tensor<string, []>("op_9545_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9546_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_939_cast_fp16)[name = tensor<string, []>("op_9546_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9547_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_941_cast_fp16)[name = tensor<string, []>("op_9547_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9548_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_943_cast_fp16)[name = tensor<string, []>("op_9548_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9549_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_945_cast_fp16)[name = tensor<string, []>("op_9549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9550_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_947_cast_fp16)[name = tensor<string, []>("op_9550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9551_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_949_cast_fp16)[name = tensor<string, []>("op_9551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9552_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_951_cast_fp16)[name = tensor<string, []>("op_9552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9553_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_953_cast_fp16)[name = tensor<string, []>("op_9553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9554_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_955_cast_fp16)[name = tensor<string, []>("op_9554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9555_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_957_cast_fp16)[name = tensor<string, []>("op_9555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_9556_cast_fp16 = softmax(axis = var_8782, x = aw_chunk_959_cast_fp16)[name = tensor<string, []>("op_9556_cast_fp16")];
+            tensor<string, []> var_9558_equation_0 = const()[name = tensor<string, []>("op_9558_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9558_cast_fp16 = einsum(equation = var_9558_equation_0, values = (var_9270_cast_fp16, var_9509_cast_fp16))[name = tensor<string, []>("op_9558_cast_fp16")];
+            tensor<string, []> var_9560_equation_0 = const()[name = tensor<string, []>("op_9560_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9560_cast_fp16 = einsum(equation = var_9560_equation_0, values = (var_9270_cast_fp16, var_9510_cast_fp16))[name = tensor<string, []>("op_9560_cast_fp16")];
+            tensor<string, []> var_9562_equation_0 = const()[name = tensor<string, []>("op_9562_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9562_cast_fp16 = einsum(equation = var_9562_equation_0, values = (var_9270_cast_fp16, var_9511_cast_fp16))[name = tensor<string, []>("op_9562_cast_fp16")];
+            tensor<string, []> var_9564_equation_0 = const()[name = tensor<string, []>("op_9564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9564_cast_fp16 = einsum(equation = var_9564_equation_0, values = (var_9270_cast_fp16, var_9512_cast_fp16))[name = tensor<string, []>("op_9564_cast_fp16")];
+            tensor<string, []> var_9566_equation_0 = const()[name = tensor<string, []>("op_9566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9566_cast_fp16 = einsum(equation = var_9566_equation_0, values = (var_9274_cast_fp16, var_9513_cast_fp16))[name = tensor<string, []>("op_9566_cast_fp16")];
+            tensor<string, []> var_9568_equation_0 = const()[name = tensor<string, []>("op_9568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9568_cast_fp16 = einsum(equation = var_9568_equation_0, values = (var_9274_cast_fp16, var_9514_cast_fp16))[name = tensor<string, []>("op_9568_cast_fp16")];
+            tensor<string, []> var_9570_equation_0 = const()[name = tensor<string, []>("op_9570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9570_cast_fp16 = einsum(equation = var_9570_equation_0, values = (var_9274_cast_fp16, var_9515_cast_fp16))[name = tensor<string, []>("op_9570_cast_fp16")];
+            tensor<string, []> var_9572_equation_0 = const()[name = tensor<string, []>("op_9572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9572_cast_fp16 = einsum(equation = var_9572_equation_0, values = (var_9274_cast_fp16, var_9516_cast_fp16))[name = tensor<string, []>("op_9572_cast_fp16")];
+            tensor<string, []> var_9574_equation_0 = const()[name = tensor<string, []>("op_9574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9574_cast_fp16 = einsum(equation = var_9574_equation_0, values = (var_9278_cast_fp16, var_9517_cast_fp16))[name = tensor<string, []>("op_9574_cast_fp16")];
+            tensor<string, []> var_9576_equation_0 = const()[name = tensor<string, []>("op_9576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9576_cast_fp16 = einsum(equation = var_9576_equation_0, values = (var_9278_cast_fp16, var_9518_cast_fp16))[name = tensor<string, []>("op_9576_cast_fp16")];
+            tensor<string, []> var_9578_equation_0 = const()[name = tensor<string, []>("op_9578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9578_cast_fp16 = einsum(equation = var_9578_equation_0, values = (var_9278_cast_fp16, var_9519_cast_fp16))[name = tensor<string, []>("op_9578_cast_fp16")];
+            tensor<string, []> var_9580_equation_0 = const()[name = tensor<string, []>("op_9580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9580_cast_fp16 = einsum(equation = var_9580_equation_0, values = (var_9278_cast_fp16, var_9520_cast_fp16))[name = tensor<string, []>("op_9580_cast_fp16")];
+            tensor<string, []> var_9582_equation_0 = const()[name = tensor<string, []>("op_9582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9582_cast_fp16 = einsum(equation = var_9582_equation_0, values = (var_9282_cast_fp16, var_9521_cast_fp16))[name = tensor<string, []>("op_9582_cast_fp16")];
+            tensor<string, []> var_9584_equation_0 = const()[name = tensor<string, []>("op_9584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9584_cast_fp16 = einsum(equation = var_9584_equation_0, values = (var_9282_cast_fp16, var_9522_cast_fp16))[name = tensor<string, []>("op_9584_cast_fp16")];
+            tensor<string, []> var_9586_equation_0 = const()[name = tensor<string, []>("op_9586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9586_cast_fp16 = einsum(equation = var_9586_equation_0, values = (var_9282_cast_fp16, var_9523_cast_fp16))[name = tensor<string, []>("op_9586_cast_fp16")];
+            tensor<string, []> var_9588_equation_0 = const()[name = tensor<string, []>("op_9588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9588_cast_fp16 = einsum(equation = var_9588_equation_0, values = (var_9282_cast_fp16, var_9524_cast_fp16))[name = tensor<string, []>("op_9588_cast_fp16")];
+            tensor<string, []> var_9590_equation_0 = const()[name = tensor<string, []>("op_9590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9590_cast_fp16 = einsum(equation = var_9590_equation_0, values = (var_9286_cast_fp16, var_9525_cast_fp16))[name = tensor<string, []>("op_9590_cast_fp16")];
+            tensor<string, []> var_9592_equation_0 = const()[name = tensor<string, []>("op_9592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9592_cast_fp16 = einsum(equation = var_9592_equation_0, values = (var_9286_cast_fp16, var_9526_cast_fp16))[name = tensor<string, []>("op_9592_cast_fp16")];
+            tensor<string, []> var_9594_equation_0 = const()[name = tensor<string, []>("op_9594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9594_cast_fp16 = einsum(equation = var_9594_equation_0, values = (var_9286_cast_fp16, var_9527_cast_fp16))[name = tensor<string, []>("op_9594_cast_fp16")];
+            tensor<string, []> var_9596_equation_0 = const()[name = tensor<string, []>("op_9596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9596_cast_fp16 = einsum(equation = var_9596_equation_0, values = (var_9286_cast_fp16, var_9528_cast_fp16))[name = tensor<string, []>("op_9596_cast_fp16")];
+            tensor<string, []> var_9598_equation_0 = const()[name = tensor<string, []>("op_9598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9598_cast_fp16 = einsum(equation = var_9598_equation_0, values = (var_9290_cast_fp16, var_9529_cast_fp16))[name = tensor<string, []>("op_9598_cast_fp16")];
+            tensor<string, []> var_9600_equation_0 = const()[name = tensor<string, []>("op_9600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9600_cast_fp16 = einsum(equation = var_9600_equation_0, values = (var_9290_cast_fp16, var_9530_cast_fp16))[name = tensor<string, []>("op_9600_cast_fp16")];
+            tensor<string, []> var_9602_equation_0 = const()[name = tensor<string, []>("op_9602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9602_cast_fp16 = einsum(equation = var_9602_equation_0, values = (var_9290_cast_fp16, var_9531_cast_fp16))[name = tensor<string, []>("op_9602_cast_fp16")];
+            tensor<string, []> var_9604_equation_0 = const()[name = tensor<string, []>("op_9604_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9604_cast_fp16 = einsum(equation = var_9604_equation_0, values = (var_9290_cast_fp16, var_9532_cast_fp16))[name = tensor<string, []>("op_9604_cast_fp16")];
+            tensor<string, []> var_9606_equation_0 = const()[name = tensor<string, []>("op_9606_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9606_cast_fp16 = einsum(equation = var_9606_equation_0, values = (var_9294_cast_fp16, var_9533_cast_fp16))[name = tensor<string, []>("op_9606_cast_fp16")];
+            tensor<string, []> var_9608_equation_0 = const()[name = tensor<string, []>("op_9608_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9608_cast_fp16 = einsum(equation = var_9608_equation_0, values = (var_9294_cast_fp16, var_9534_cast_fp16))[name = tensor<string, []>("op_9608_cast_fp16")];
+            tensor<string, []> var_9610_equation_0 = const()[name = tensor<string, []>("op_9610_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9610_cast_fp16 = einsum(equation = var_9610_equation_0, values = (var_9294_cast_fp16, var_9535_cast_fp16))[name = tensor<string, []>("op_9610_cast_fp16")];
+            tensor<string, []> var_9612_equation_0 = const()[name = tensor<string, []>("op_9612_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9612_cast_fp16 = einsum(equation = var_9612_equation_0, values = (var_9294_cast_fp16, var_9536_cast_fp16))[name = tensor<string, []>("op_9612_cast_fp16")];
+            tensor<string, []> var_9614_equation_0 = const()[name = tensor<string, []>("op_9614_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9614_cast_fp16 = einsum(equation = var_9614_equation_0, values = (var_9298_cast_fp16, var_9537_cast_fp16))[name = tensor<string, []>("op_9614_cast_fp16")];
+            tensor<string, []> var_9616_equation_0 = const()[name = tensor<string, []>("op_9616_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9616_cast_fp16 = einsum(equation = var_9616_equation_0, values = (var_9298_cast_fp16, var_9538_cast_fp16))[name = tensor<string, []>("op_9616_cast_fp16")];
+            tensor<string, []> var_9618_equation_0 = const()[name = tensor<string, []>("op_9618_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9618_cast_fp16 = einsum(equation = var_9618_equation_0, values = (var_9298_cast_fp16, var_9539_cast_fp16))[name = tensor<string, []>("op_9618_cast_fp16")];
+            tensor<string, []> var_9620_equation_0 = const()[name = tensor<string, []>("op_9620_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9620_cast_fp16 = einsum(equation = var_9620_equation_0, values = (var_9298_cast_fp16, var_9540_cast_fp16))[name = tensor<string, []>("op_9620_cast_fp16")];
+            tensor<string, []> var_9622_equation_0 = const()[name = tensor<string, []>("op_9622_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9622_cast_fp16 = einsum(equation = var_9622_equation_0, values = (var_9302_cast_fp16, var_9541_cast_fp16))[name = tensor<string, []>("op_9622_cast_fp16")];
+            tensor<string, []> var_9624_equation_0 = const()[name = tensor<string, []>("op_9624_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9624_cast_fp16 = einsum(equation = var_9624_equation_0, values = (var_9302_cast_fp16, var_9542_cast_fp16))[name = tensor<string, []>("op_9624_cast_fp16")];
+            tensor<string, []> var_9626_equation_0 = const()[name = tensor<string, []>("op_9626_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9626_cast_fp16 = einsum(equation = var_9626_equation_0, values = (var_9302_cast_fp16, var_9543_cast_fp16))[name = tensor<string, []>("op_9626_cast_fp16")];
+            tensor<string, []> var_9628_equation_0 = const()[name = tensor<string, []>("op_9628_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9628_cast_fp16 = einsum(equation = var_9628_equation_0, values = (var_9302_cast_fp16, var_9544_cast_fp16))[name = tensor<string, []>("op_9628_cast_fp16")];
+            tensor<string, []> var_9630_equation_0 = const()[name = tensor<string, []>("op_9630_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9630_cast_fp16 = einsum(equation = var_9630_equation_0, values = (var_9306_cast_fp16, var_9545_cast_fp16))[name = tensor<string, []>("op_9630_cast_fp16")];
+            tensor<string, []> var_9632_equation_0 = const()[name = tensor<string, []>("op_9632_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9632_cast_fp16 = einsum(equation = var_9632_equation_0, values = (var_9306_cast_fp16, var_9546_cast_fp16))[name = tensor<string, []>("op_9632_cast_fp16")];
+            tensor<string, []> var_9634_equation_0 = const()[name = tensor<string, []>("op_9634_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9634_cast_fp16 = einsum(equation = var_9634_equation_0, values = (var_9306_cast_fp16, var_9547_cast_fp16))[name = tensor<string, []>("op_9634_cast_fp16")];
+            tensor<string, []> var_9636_equation_0 = const()[name = tensor<string, []>("op_9636_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9636_cast_fp16 = einsum(equation = var_9636_equation_0, values = (var_9306_cast_fp16, var_9548_cast_fp16))[name = tensor<string, []>("op_9636_cast_fp16")];
+            tensor<string, []> var_9638_equation_0 = const()[name = tensor<string, []>("op_9638_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9638_cast_fp16 = einsum(equation = var_9638_equation_0, values = (var_9310_cast_fp16, var_9549_cast_fp16))[name = tensor<string, []>("op_9638_cast_fp16")];
+            tensor<string, []> var_9640_equation_0 = const()[name = tensor<string, []>("op_9640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9640_cast_fp16 = einsum(equation = var_9640_equation_0, values = (var_9310_cast_fp16, var_9550_cast_fp16))[name = tensor<string, []>("op_9640_cast_fp16")];
+            tensor<string, []> var_9642_equation_0 = const()[name = tensor<string, []>("op_9642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9642_cast_fp16 = einsum(equation = var_9642_equation_0, values = (var_9310_cast_fp16, var_9551_cast_fp16))[name = tensor<string, []>("op_9642_cast_fp16")];
+            tensor<string, []> var_9644_equation_0 = const()[name = tensor<string, []>("op_9644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9644_cast_fp16 = einsum(equation = var_9644_equation_0, values = (var_9310_cast_fp16, var_9552_cast_fp16))[name = tensor<string, []>("op_9644_cast_fp16")];
+            tensor<string, []> var_9646_equation_0 = const()[name = tensor<string, []>("op_9646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9646_cast_fp16 = einsum(equation = var_9646_equation_0, values = (var_9314_cast_fp16, var_9553_cast_fp16))[name = tensor<string, []>("op_9646_cast_fp16")];
+            tensor<string, []> var_9648_equation_0 = const()[name = tensor<string, []>("op_9648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9648_cast_fp16 = einsum(equation = var_9648_equation_0, values = (var_9314_cast_fp16, var_9554_cast_fp16))[name = tensor<string, []>("op_9648_cast_fp16")];
+            tensor<string, []> var_9650_equation_0 = const()[name = tensor<string, []>("op_9650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9650_cast_fp16 = einsum(equation = var_9650_equation_0, values = (var_9314_cast_fp16, var_9555_cast_fp16))[name = tensor<string, []>("op_9650_cast_fp16")];
+            tensor<string, []> var_9652_equation_0 = const()[name = tensor<string, []>("op_9652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_9652_cast_fp16 = einsum(equation = var_9652_equation_0, values = (var_9314_cast_fp16, var_9556_cast_fp16))[name = tensor<string, []>("op_9652_cast_fp16")];
+            tensor<bool, []> var_9654_interleave_0 = const()[name = tensor<string, []>("op_9654_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9654_cast_fp16 = concat(axis = var_8765, interleave = var_9654_interleave_0, values = (var_9558_cast_fp16, var_9560_cast_fp16, var_9562_cast_fp16, var_9564_cast_fp16))[name = tensor<string, []>("op_9654_cast_fp16")];
+            tensor<bool, []> var_9656_interleave_0 = const()[name = tensor<string, []>("op_9656_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9656_cast_fp16 = concat(axis = var_8765, interleave = var_9656_interleave_0, values = (var_9566_cast_fp16, var_9568_cast_fp16, var_9570_cast_fp16, var_9572_cast_fp16))[name = tensor<string, []>("op_9656_cast_fp16")];
+            tensor<bool, []> var_9658_interleave_0 = const()[name = tensor<string, []>("op_9658_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9658_cast_fp16 = concat(axis = var_8765, interleave = var_9658_interleave_0, values = (var_9574_cast_fp16, var_9576_cast_fp16, var_9578_cast_fp16, var_9580_cast_fp16))[name = tensor<string, []>("op_9658_cast_fp16")];
+            tensor<bool, []> var_9660_interleave_0 = const()[name = tensor<string, []>("op_9660_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9660_cast_fp16 = concat(axis = var_8765, interleave = var_9660_interleave_0, values = (var_9582_cast_fp16, var_9584_cast_fp16, var_9586_cast_fp16, var_9588_cast_fp16))[name = tensor<string, []>("op_9660_cast_fp16")];
+            tensor<bool, []> var_9662_interleave_0 = const()[name = tensor<string, []>("op_9662_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9662_cast_fp16 = concat(axis = var_8765, interleave = var_9662_interleave_0, values = (var_9590_cast_fp16, var_9592_cast_fp16, var_9594_cast_fp16, var_9596_cast_fp16))[name = tensor<string, []>("op_9662_cast_fp16")];
+            tensor<bool, []> var_9664_interleave_0 = const()[name = tensor<string, []>("op_9664_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9664_cast_fp16 = concat(axis = var_8765, interleave = var_9664_interleave_0, values = (var_9598_cast_fp16, var_9600_cast_fp16, var_9602_cast_fp16, var_9604_cast_fp16))[name = tensor<string, []>("op_9664_cast_fp16")];
+            tensor<bool, []> var_9666_interleave_0 = const()[name = tensor<string, []>("op_9666_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9666_cast_fp16 = concat(axis = var_8765, interleave = var_9666_interleave_0, values = (var_9606_cast_fp16, var_9608_cast_fp16, var_9610_cast_fp16, var_9612_cast_fp16))[name = tensor<string, []>("op_9666_cast_fp16")];
+            tensor<bool, []> var_9668_interleave_0 = const()[name = tensor<string, []>("op_9668_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9668_cast_fp16 = concat(axis = var_8765, interleave = var_9668_interleave_0, values = (var_9614_cast_fp16, var_9616_cast_fp16, var_9618_cast_fp16, var_9620_cast_fp16))[name = tensor<string, []>("op_9668_cast_fp16")];
+            tensor<bool, []> var_9670_interleave_0 = const()[name = tensor<string, []>("op_9670_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9670_cast_fp16 = concat(axis = var_8765, interleave = var_9670_interleave_0, values = (var_9622_cast_fp16, var_9624_cast_fp16, var_9626_cast_fp16, var_9628_cast_fp16))[name = tensor<string, []>("op_9670_cast_fp16")];
+            tensor<bool, []> var_9672_interleave_0 = const()[name = tensor<string, []>("op_9672_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9672_cast_fp16 = concat(axis = var_8765, interleave = var_9672_interleave_0, values = (var_9630_cast_fp16, var_9632_cast_fp16, var_9634_cast_fp16, var_9636_cast_fp16))[name = tensor<string, []>("op_9672_cast_fp16")];
+            tensor<bool, []> var_9674_interleave_0 = const()[name = tensor<string, []>("op_9674_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9674_cast_fp16 = concat(axis = var_8765, interleave = var_9674_interleave_0, values = (var_9638_cast_fp16, var_9640_cast_fp16, var_9642_cast_fp16, var_9644_cast_fp16))[name = tensor<string, []>("op_9674_cast_fp16")];
+            tensor<bool, []> var_9676_interleave_0 = const()[name = tensor<string, []>("op_9676_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_9676_cast_fp16 = concat(axis = var_8765, interleave = var_9676_interleave_0, values = (var_9646_cast_fp16, var_9648_cast_fp16, var_9650_cast_fp16, var_9652_cast_fp16))[name = tensor<string, []>("op_9676_cast_fp16")];
+            tensor<bool, []> input_73_interleave_0 = const()[name = tensor<string, []>("input_73_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_73_cast_fp16 = concat(axis = var_8782, interleave = input_73_interleave_0, values = (var_9654_cast_fp16, var_9656_cast_fp16, var_9658_cast_fp16, var_9660_cast_fp16, var_9662_cast_fp16, var_9664_cast_fp16, var_9666_cast_fp16, var_9668_cast_fp16, var_9670_cast_fp16, var_9672_cast_fp16, var_9674_cast_fp16, var_9676_cast_fp16))[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<int32, [2]> var_9681 = const()[name = tensor<string, []>("op_9681"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9683 = const()[name = tensor<string, []>("op_9683"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137340288)))];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138520000)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_39_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = var_9683, groups = var_8782, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_9681, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> var_9689 = const()[name = tensor<string, []>("op_9689"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_39_cast_fp16 = reduce_mean(axes = var_9689, keep_dims = var_8783, x = inputs_39_cast_fp16)[name = tensor<string, []>("channels_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_39_cast_fp16 = sub(x = inputs_39_cast_fp16, y = channels_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = zero_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_sq_39_cast_fp16")];
+            tensor<int32, [1]> var_9693 = const()[name = tensor<string, []>("op_9693"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_9694_cast_fp16 = reduce_mean(axes = var_9693, keep_dims = var_8783, x = zero_mean_sq_39_cast_fp16)[name = tensor<string, []>("op_9694_cast_fp16")];
+            tensor<fp16, []> var_9695_to_fp16 = const()[name = tensor<string, []>("op_9695_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_9696_cast_fp16 = add(x = var_9694_cast_fp16, y = var_9695_to_fp16)[name = tensor<string, []>("op_9696_cast_fp16")];
+            tensor<fp16, []> denom_39_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_39_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0_to_fp16, x = var_9696_cast_fp16)[name = tensor<string, []>("denom_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = denom_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138521600)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = tensor<string, []>("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138523200)))];
+            tensor<fp16, []> input_75_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_75_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<int32, [2]> var_9707 = const()[name = tensor<string, []>("op_9707"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9709 = const()[name = tensor<string, []>("op_9709"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_77_pad_type_0 = const()[name = tensor<string, []>("input_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_77_pad_0 = const()[name = tensor<string, []>("input_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138524800)))];
+            tensor<fp16, [3072]> layers_9_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143243456)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_77_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = var_9709, groups = var_8782, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = var_9707, weight = layers_9_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<int32, [2]> var_9715 = const()[name = tensor<string, []>("op_9715"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9717 = const()[name = tensor<string, []>("op_9717"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_23_pad_type_0 = const()[name = tensor<string, []>("hidden_states_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = tensor<string, []>("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143249664)))];
+            tensor<fp16, [768]> layers_9_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147968320)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_23_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = var_9717, groups = var_8782, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = var_9715, weight = layers_9_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_9724 = const()[name = tensor<string, []>("op_9724"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_9741 = const()[name = tensor<string, []>("op_9741"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_9742 = const()[name = tensor<string, []>("op_9742"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_9752 = const()[name = tensor<string, []>("op_9752"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_41_cast_fp16 = reduce_mean(axes = var_9752, keep_dims = var_9742, x = inputs_41_cast_fp16)[name = tensor<string, []>("channels_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_41_cast_fp16 = sub(x = inputs_41_cast_fp16, y = channels_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = zero_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_sq_41_cast_fp16")];
+            tensor<int32, [1]> var_9756 = const()[name = tensor<string, []>("op_9756"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_9757_cast_fp16 = reduce_mean(axes = var_9756, keep_dims = var_9742, x = zero_mean_sq_41_cast_fp16)[name = tensor<string, []>("op_9757_cast_fp16")];
+            tensor<fp16, []> var_9758_to_fp16 = const()[name = tensor<string, []>("op_9758_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_9759_cast_fp16 = add(x = var_9757_cast_fp16, y = var_9758_to_fp16)[name = tensor<string, []>("op_9759_cast_fp16")];
+            tensor<fp16, []> denom_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0_to_fp16, x = var_9759_cast_fp16)[name = tensor<string, []>("denom_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = denom_41_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
+            tensor<fp16, [768]> obj_41_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_41_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147969920)))];
+            tensor<fp16, [768]> obj_41_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_41_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147971520)))];
+            tensor<fp16, []> obj_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [2]> var_9774 = const()[name = tensor<string, []>("op_9774"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9776 = const()[name = tensor<string, []>("op_9776"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147973120)))];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149152832)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_21_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = var_9776, groups = var_9741, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_9774, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<int32, [2]> var_9780 = const()[name = tensor<string, []>("op_9780"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9782 = const()[name = tensor<string, []>("op_9782"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_21_pad_type_0 = const()[name = tensor<string, []>("key_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_21_pad_0 = const()[name = tensor<string, []>("key_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149154432)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_21_cast_fp16 = conv(dilations = var_9782, groups = var_9741, pad = key_21_pad_0, pad_type = key_21_pad_type_0, strides = var_9780, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<int32, [2]> var_9787 = const()[name = tensor<string, []>("op_9787"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_9789 = const()[name = tensor<string, []>("op_9789"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_21_pad_type_0 = const()[name = tensor<string, []>("value_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_21_pad_0 = const()[name = tensor<string, []>("value_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(150334144)))];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151513856)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = var_9789, groups = var_9741, pad = value_21_pad_0, pad_type = value_21_pad_type_0, strides = var_9787, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_9796_begin_0 = const()[name = tensor<string, []>("op_9796_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9796_end_0 = const()[name = tensor<string, []>("op_9796_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9796_end_mask_0 = const()[name = tensor<string, []>("op_9796_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9796_cast_fp16 = slice_by_index(begin = var_9796_begin_0, end = var_9796_end_0, end_mask = var_9796_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9796_cast_fp16")];
+            tensor<int32, [4]> var_9800_begin_0 = const()[name = tensor<string, []>("op_9800_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_9800_end_0 = const()[name = tensor<string, []>("op_9800_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_9800_end_mask_0 = const()[name = tensor<string, []>("op_9800_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9800_cast_fp16 = slice_by_index(begin = var_9800_begin_0, end = var_9800_end_0, end_mask = var_9800_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9800_cast_fp16")];
+            tensor<int32, [4]> var_9804_begin_0 = const()[name = tensor<string, []>("op_9804_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_9804_end_0 = const()[name = tensor<string, []>("op_9804_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_9804_end_mask_0 = const()[name = tensor<string, []>("op_9804_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9804_cast_fp16 = slice_by_index(begin = var_9804_begin_0, end = var_9804_end_0, end_mask = var_9804_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9804_cast_fp16")];
+            tensor<int32, [4]> var_9808_begin_0 = const()[name = tensor<string, []>("op_9808_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_9808_end_0 = const()[name = tensor<string, []>("op_9808_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_9808_end_mask_0 = const()[name = tensor<string, []>("op_9808_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9808_cast_fp16 = slice_by_index(begin = var_9808_begin_0, end = var_9808_end_0, end_mask = var_9808_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9808_cast_fp16")];
+            tensor<int32, [4]> var_9812_begin_0 = const()[name = tensor<string, []>("op_9812_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_9812_end_0 = const()[name = tensor<string, []>("op_9812_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_9812_end_mask_0 = const()[name = tensor<string, []>("op_9812_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9812_cast_fp16 = slice_by_index(begin = var_9812_begin_0, end = var_9812_end_0, end_mask = var_9812_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9812_cast_fp16")];
+            tensor<int32, [4]> var_9816_begin_0 = const()[name = tensor<string, []>("op_9816_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_9816_end_0 = const()[name = tensor<string, []>("op_9816_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_9816_end_mask_0 = const()[name = tensor<string, []>("op_9816_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9816_cast_fp16 = slice_by_index(begin = var_9816_begin_0, end = var_9816_end_0, end_mask = var_9816_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9816_cast_fp16")];
+            tensor<int32, [4]> var_9820_begin_0 = const()[name = tensor<string, []>("op_9820_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_9820_end_0 = const()[name = tensor<string, []>("op_9820_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_9820_end_mask_0 = const()[name = tensor<string, []>("op_9820_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9820_cast_fp16 = slice_by_index(begin = var_9820_begin_0, end = var_9820_end_0, end_mask = var_9820_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9820_cast_fp16")];
+            tensor<int32, [4]> var_9824_begin_0 = const()[name = tensor<string, []>("op_9824_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_9824_end_0 = const()[name = tensor<string, []>("op_9824_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_9824_end_mask_0 = const()[name = tensor<string, []>("op_9824_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9824_cast_fp16 = slice_by_index(begin = var_9824_begin_0, end = var_9824_end_0, end_mask = var_9824_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9824_cast_fp16")];
+            tensor<int32, [4]> var_9828_begin_0 = const()[name = tensor<string, []>("op_9828_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_9828_end_0 = const()[name = tensor<string, []>("op_9828_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_9828_end_mask_0 = const()[name = tensor<string, []>("op_9828_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9828_cast_fp16 = slice_by_index(begin = var_9828_begin_0, end = var_9828_end_0, end_mask = var_9828_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9828_cast_fp16")];
+            tensor<int32, [4]> var_9832_begin_0 = const()[name = tensor<string, []>("op_9832_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_9832_end_0 = const()[name = tensor<string, []>("op_9832_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_9832_end_mask_0 = const()[name = tensor<string, []>("op_9832_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9832_cast_fp16 = slice_by_index(begin = var_9832_begin_0, end = var_9832_end_0, end_mask = var_9832_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9832_cast_fp16")];
+            tensor<int32, [4]> var_9836_begin_0 = const()[name = tensor<string, []>("op_9836_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_9836_end_0 = const()[name = tensor<string, []>("op_9836_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_9836_end_mask_0 = const()[name = tensor<string, []>("op_9836_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9836_cast_fp16 = slice_by_index(begin = var_9836_begin_0, end = var_9836_end_0, end_mask = var_9836_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9836_cast_fp16")];
+            tensor<int32, [4]> var_9840_begin_0 = const()[name = tensor<string, []>("op_9840_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_9840_end_0 = const()[name = tensor<string, []>("op_9840_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_9840_end_mask_0 = const()[name = tensor<string, []>("op_9840_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_9840_cast_fp16 = slice_by_index(begin = var_9840_begin_0, end = var_9840_end_0, end_mask = var_9840_end_mask_0, x = query_21_cast_fp16)[name = tensor<string, []>("op_9840_cast_fp16")];
+            tensor<int32, [4]> var_9849_begin_0 = const()[name = tensor<string, []>("op_9849_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9849_end_0 = const()[name = tensor<string, []>("op_9849_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9849_end_mask_0 = const()[name = tensor<string, []>("op_9849_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9849_cast_fp16 = slice_by_index(begin = var_9849_begin_0, end = var_9849_end_0, end_mask = var_9849_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9849_cast_fp16")];
+            tensor<int32, [4]> var_9856_begin_0 = const()[name = tensor<string, []>("op_9856_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9856_end_0 = const()[name = tensor<string, []>("op_9856_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9856_end_mask_0 = const()[name = tensor<string, []>("op_9856_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9856_cast_fp16 = slice_by_index(begin = var_9856_begin_0, end = var_9856_end_0, end_mask = var_9856_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9856_cast_fp16")];
+            tensor<int32, [4]> var_9863_begin_0 = const()[name = tensor<string, []>("op_9863_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9863_end_0 = const()[name = tensor<string, []>("op_9863_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9863_end_mask_0 = const()[name = tensor<string, []>("op_9863_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9863_cast_fp16 = slice_by_index(begin = var_9863_begin_0, end = var_9863_end_0, end_mask = var_9863_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9863_cast_fp16")];
+            tensor<int32, [4]> var_9870_begin_0 = const()[name = tensor<string, []>("op_9870_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9870_end_0 = const()[name = tensor<string, []>("op_9870_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9870_end_mask_0 = const()[name = tensor<string, []>("op_9870_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9870_cast_fp16 = slice_by_index(begin = var_9870_begin_0, end = var_9870_end_0, end_mask = var_9870_end_mask_0, x = var_9796_cast_fp16)[name = tensor<string, []>("op_9870_cast_fp16")];
+            tensor<int32, [4]> var_9877_begin_0 = const()[name = tensor<string, []>("op_9877_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9877_end_0 = const()[name = tensor<string, []>("op_9877_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9877_end_mask_0 = const()[name = tensor<string, []>("op_9877_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9877_cast_fp16 = slice_by_index(begin = var_9877_begin_0, end = var_9877_end_0, end_mask = var_9877_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9877_cast_fp16")];
+            tensor<int32, [4]> var_9884_begin_0 = const()[name = tensor<string, []>("op_9884_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9884_end_0 = const()[name = tensor<string, []>("op_9884_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9884_end_mask_0 = const()[name = tensor<string, []>("op_9884_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9884_cast_fp16 = slice_by_index(begin = var_9884_begin_0, end = var_9884_end_0, end_mask = var_9884_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9884_cast_fp16")];
+            tensor<int32, [4]> var_9891_begin_0 = const()[name = tensor<string, []>("op_9891_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9891_end_0 = const()[name = tensor<string, []>("op_9891_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9891_end_mask_0 = const()[name = tensor<string, []>("op_9891_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9891_cast_fp16 = slice_by_index(begin = var_9891_begin_0, end = var_9891_end_0, end_mask = var_9891_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9891_cast_fp16")];
+            tensor<int32, [4]> var_9898_begin_0 = const()[name = tensor<string, []>("op_9898_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9898_end_0 = const()[name = tensor<string, []>("op_9898_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9898_end_mask_0 = const()[name = tensor<string, []>("op_9898_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9898_cast_fp16 = slice_by_index(begin = var_9898_begin_0, end = var_9898_end_0, end_mask = var_9898_end_mask_0, x = var_9800_cast_fp16)[name = tensor<string, []>("op_9898_cast_fp16")];
+            tensor<int32, [4]> var_9905_begin_0 = const()[name = tensor<string, []>("op_9905_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9905_end_0 = const()[name = tensor<string, []>("op_9905_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9905_end_mask_0 = const()[name = tensor<string, []>("op_9905_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9905_cast_fp16 = slice_by_index(begin = var_9905_begin_0, end = var_9905_end_0, end_mask = var_9905_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9905_cast_fp16")];
+            tensor<int32, [4]> var_9912_begin_0 = const()[name = tensor<string, []>("op_9912_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9912_end_0 = const()[name = tensor<string, []>("op_9912_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9912_end_mask_0 = const()[name = tensor<string, []>("op_9912_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9912_cast_fp16 = slice_by_index(begin = var_9912_begin_0, end = var_9912_end_0, end_mask = var_9912_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9912_cast_fp16")];
+            tensor<int32, [4]> var_9919_begin_0 = const()[name = tensor<string, []>("op_9919_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9919_end_0 = const()[name = tensor<string, []>("op_9919_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9919_end_mask_0 = const()[name = tensor<string, []>("op_9919_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9919_cast_fp16 = slice_by_index(begin = var_9919_begin_0, end = var_9919_end_0, end_mask = var_9919_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9919_cast_fp16")];
+            tensor<int32, [4]> var_9926_begin_0 = const()[name = tensor<string, []>("op_9926_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9926_end_0 = const()[name = tensor<string, []>("op_9926_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9926_end_mask_0 = const()[name = tensor<string, []>("op_9926_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9926_cast_fp16 = slice_by_index(begin = var_9926_begin_0, end = var_9926_end_0, end_mask = var_9926_end_mask_0, x = var_9804_cast_fp16)[name = tensor<string, []>("op_9926_cast_fp16")];
+            tensor<int32, [4]> var_9933_begin_0 = const()[name = tensor<string, []>("op_9933_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9933_end_0 = const()[name = tensor<string, []>("op_9933_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9933_end_mask_0 = const()[name = tensor<string, []>("op_9933_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9933_cast_fp16 = slice_by_index(begin = var_9933_begin_0, end = var_9933_end_0, end_mask = var_9933_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9933_cast_fp16")];
+            tensor<int32, [4]> var_9940_begin_0 = const()[name = tensor<string, []>("op_9940_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9940_end_0 = const()[name = tensor<string, []>("op_9940_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9940_end_mask_0 = const()[name = tensor<string, []>("op_9940_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9940_cast_fp16 = slice_by_index(begin = var_9940_begin_0, end = var_9940_end_0, end_mask = var_9940_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9940_cast_fp16")];
+            tensor<int32, [4]> var_9947_begin_0 = const()[name = tensor<string, []>("op_9947_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9947_end_0 = const()[name = tensor<string, []>("op_9947_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9947_end_mask_0 = const()[name = tensor<string, []>("op_9947_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9947_cast_fp16 = slice_by_index(begin = var_9947_begin_0, end = var_9947_end_0, end_mask = var_9947_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9947_cast_fp16")];
+            tensor<int32, [4]> var_9954_begin_0 = const()[name = tensor<string, []>("op_9954_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9954_end_0 = const()[name = tensor<string, []>("op_9954_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9954_end_mask_0 = const()[name = tensor<string, []>("op_9954_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9954_cast_fp16 = slice_by_index(begin = var_9954_begin_0, end = var_9954_end_0, end_mask = var_9954_end_mask_0, x = var_9808_cast_fp16)[name = tensor<string, []>("op_9954_cast_fp16")];
+            tensor<int32, [4]> var_9961_begin_0 = const()[name = tensor<string, []>("op_9961_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9961_end_0 = const()[name = tensor<string, []>("op_9961_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9961_end_mask_0 = const()[name = tensor<string, []>("op_9961_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9961_cast_fp16 = slice_by_index(begin = var_9961_begin_0, end = var_9961_end_0, end_mask = var_9961_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9961_cast_fp16")];
+            tensor<int32, [4]> var_9968_begin_0 = const()[name = tensor<string, []>("op_9968_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9968_end_0 = const()[name = tensor<string, []>("op_9968_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9968_end_mask_0 = const()[name = tensor<string, []>("op_9968_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9968_cast_fp16 = slice_by_index(begin = var_9968_begin_0, end = var_9968_end_0, end_mask = var_9968_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9968_cast_fp16")];
+            tensor<int32, [4]> var_9975_begin_0 = const()[name = tensor<string, []>("op_9975_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_9975_end_0 = const()[name = tensor<string, []>("op_9975_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_9975_end_mask_0 = const()[name = tensor<string, []>("op_9975_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9975_cast_fp16 = slice_by_index(begin = var_9975_begin_0, end = var_9975_end_0, end_mask = var_9975_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9975_cast_fp16")];
+            tensor<int32, [4]> var_9982_begin_0 = const()[name = tensor<string, []>("op_9982_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_9982_end_0 = const()[name = tensor<string, []>("op_9982_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_9982_end_mask_0 = const()[name = tensor<string, []>("op_9982_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9982_cast_fp16 = slice_by_index(begin = var_9982_begin_0, end = var_9982_end_0, end_mask = var_9982_end_mask_0, x = var_9812_cast_fp16)[name = tensor<string, []>("op_9982_cast_fp16")];
+            tensor<int32, [4]> var_9989_begin_0 = const()[name = tensor<string, []>("op_9989_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_9989_end_0 = const()[name = tensor<string, []>("op_9989_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_9989_end_mask_0 = const()[name = tensor<string, []>("op_9989_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9989_cast_fp16 = slice_by_index(begin = var_9989_begin_0, end = var_9989_end_0, end_mask = var_9989_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_9989_cast_fp16")];
+            tensor<int32, [4]> var_9996_begin_0 = const()[name = tensor<string, []>("op_9996_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_9996_end_0 = const()[name = tensor<string, []>("op_9996_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_9996_end_mask_0 = const()[name = tensor<string, []>("op_9996_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_9996_cast_fp16 = slice_by_index(begin = var_9996_begin_0, end = var_9996_end_0, end_mask = var_9996_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_9996_cast_fp16")];
+            tensor<int32, [4]> var_10003_begin_0 = const()[name = tensor<string, []>("op_10003_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10003_end_0 = const()[name = tensor<string, []>("op_10003_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10003_end_mask_0 = const()[name = tensor<string, []>("op_10003_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10003_cast_fp16 = slice_by_index(begin = var_10003_begin_0, end = var_10003_end_0, end_mask = var_10003_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_10003_cast_fp16")];
+            tensor<int32, [4]> var_10010_begin_0 = const()[name = tensor<string, []>("op_10010_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10010_end_0 = const()[name = tensor<string, []>("op_10010_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10010_end_mask_0 = const()[name = tensor<string, []>("op_10010_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10010_cast_fp16 = slice_by_index(begin = var_10010_begin_0, end = var_10010_end_0, end_mask = var_10010_end_mask_0, x = var_9816_cast_fp16)[name = tensor<string, []>("op_10010_cast_fp16")];
+            tensor<int32, [4]> var_10017_begin_0 = const()[name = tensor<string, []>("op_10017_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10017_end_0 = const()[name = tensor<string, []>("op_10017_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10017_end_mask_0 = const()[name = tensor<string, []>("op_10017_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10017_cast_fp16 = slice_by_index(begin = var_10017_begin_0, end = var_10017_end_0, end_mask = var_10017_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10017_cast_fp16")];
+            tensor<int32, [4]> var_10024_begin_0 = const()[name = tensor<string, []>("op_10024_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10024_end_0 = const()[name = tensor<string, []>("op_10024_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10024_end_mask_0 = const()[name = tensor<string, []>("op_10024_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10024_cast_fp16 = slice_by_index(begin = var_10024_begin_0, end = var_10024_end_0, end_mask = var_10024_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10024_cast_fp16")];
+            tensor<int32, [4]> var_10031_begin_0 = const()[name = tensor<string, []>("op_10031_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10031_end_0 = const()[name = tensor<string, []>("op_10031_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10031_end_mask_0 = const()[name = tensor<string, []>("op_10031_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10031_cast_fp16 = slice_by_index(begin = var_10031_begin_0, end = var_10031_end_0, end_mask = var_10031_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10031_cast_fp16")];
+            tensor<int32, [4]> var_10038_begin_0 = const()[name = tensor<string, []>("op_10038_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10038_end_0 = const()[name = tensor<string, []>("op_10038_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10038_end_mask_0 = const()[name = tensor<string, []>("op_10038_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10038_cast_fp16 = slice_by_index(begin = var_10038_begin_0, end = var_10038_end_0, end_mask = var_10038_end_mask_0, x = var_9820_cast_fp16)[name = tensor<string, []>("op_10038_cast_fp16")];
+            tensor<int32, [4]> var_10045_begin_0 = const()[name = tensor<string, []>("op_10045_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10045_end_0 = const()[name = tensor<string, []>("op_10045_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10045_end_mask_0 = const()[name = tensor<string, []>("op_10045_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10045_cast_fp16 = slice_by_index(begin = var_10045_begin_0, end = var_10045_end_0, end_mask = var_10045_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10045_cast_fp16")];
+            tensor<int32, [4]> var_10052_begin_0 = const()[name = tensor<string, []>("op_10052_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10052_end_0 = const()[name = tensor<string, []>("op_10052_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10052_end_mask_0 = const()[name = tensor<string, []>("op_10052_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10052_cast_fp16 = slice_by_index(begin = var_10052_begin_0, end = var_10052_end_0, end_mask = var_10052_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10052_cast_fp16")];
+            tensor<int32, [4]> var_10059_begin_0 = const()[name = tensor<string, []>("op_10059_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10059_end_0 = const()[name = tensor<string, []>("op_10059_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10059_end_mask_0 = const()[name = tensor<string, []>("op_10059_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10059_cast_fp16 = slice_by_index(begin = var_10059_begin_0, end = var_10059_end_0, end_mask = var_10059_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10059_cast_fp16")];
+            tensor<int32, [4]> var_10066_begin_0 = const()[name = tensor<string, []>("op_10066_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10066_end_0 = const()[name = tensor<string, []>("op_10066_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10066_end_mask_0 = const()[name = tensor<string, []>("op_10066_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10066_cast_fp16 = slice_by_index(begin = var_10066_begin_0, end = var_10066_end_0, end_mask = var_10066_end_mask_0, x = var_9824_cast_fp16)[name = tensor<string, []>("op_10066_cast_fp16")];
+            tensor<int32, [4]> var_10073_begin_0 = const()[name = tensor<string, []>("op_10073_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10073_end_0 = const()[name = tensor<string, []>("op_10073_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10073_end_mask_0 = const()[name = tensor<string, []>("op_10073_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10073_cast_fp16 = slice_by_index(begin = var_10073_begin_0, end = var_10073_end_0, end_mask = var_10073_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10073_cast_fp16")];
+            tensor<int32, [4]> var_10080_begin_0 = const()[name = tensor<string, []>("op_10080_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10080_end_0 = const()[name = tensor<string, []>("op_10080_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10080_end_mask_0 = const()[name = tensor<string, []>("op_10080_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10080_cast_fp16 = slice_by_index(begin = var_10080_begin_0, end = var_10080_end_0, end_mask = var_10080_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10080_cast_fp16")];
+            tensor<int32, [4]> var_10087_begin_0 = const()[name = tensor<string, []>("op_10087_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10087_end_0 = const()[name = tensor<string, []>("op_10087_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10087_end_mask_0 = const()[name = tensor<string, []>("op_10087_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10087_cast_fp16 = slice_by_index(begin = var_10087_begin_0, end = var_10087_end_0, end_mask = var_10087_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10087_cast_fp16")];
+            tensor<int32, [4]> var_10094_begin_0 = const()[name = tensor<string, []>("op_10094_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10094_end_0 = const()[name = tensor<string, []>("op_10094_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10094_end_mask_0 = const()[name = tensor<string, []>("op_10094_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10094_cast_fp16 = slice_by_index(begin = var_10094_begin_0, end = var_10094_end_0, end_mask = var_10094_end_mask_0, x = var_9828_cast_fp16)[name = tensor<string, []>("op_10094_cast_fp16")];
+            tensor<int32, [4]> var_10101_begin_0 = const()[name = tensor<string, []>("op_10101_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10101_end_0 = const()[name = tensor<string, []>("op_10101_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10101_end_mask_0 = const()[name = tensor<string, []>("op_10101_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10101_cast_fp16 = slice_by_index(begin = var_10101_begin_0, end = var_10101_end_0, end_mask = var_10101_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10101_cast_fp16")];
+            tensor<int32, [4]> var_10108_begin_0 = const()[name = tensor<string, []>("op_10108_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10108_end_0 = const()[name = tensor<string, []>("op_10108_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10108_end_mask_0 = const()[name = tensor<string, []>("op_10108_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10108_cast_fp16 = slice_by_index(begin = var_10108_begin_0, end = var_10108_end_0, end_mask = var_10108_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10108_cast_fp16")];
+            tensor<int32, [4]> var_10115_begin_0 = const()[name = tensor<string, []>("op_10115_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10115_end_0 = const()[name = tensor<string, []>("op_10115_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10115_end_mask_0 = const()[name = tensor<string, []>("op_10115_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10115_cast_fp16 = slice_by_index(begin = var_10115_begin_0, end = var_10115_end_0, end_mask = var_10115_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10115_cast_fp16")];
+            tensor<int32, [4]> var_10122_begin_0 = const()[name = tensor<string, []>("op_10122_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10122_end_0 = const()[name = tensor<string, []>("op_10122_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10122_end_mask_0 = const()[name = tensor<string, []>("op_10122_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10122_cast_fp16 = slice_by_index(begin = var_10122_begin_0, end = var_10122_end_0, end_mask = var_10122_end_mask_0, x = var_9832_cast_fp16)[name = tensor<string, []>("op_10122_cast_fp16")];
+            tensor<int32, [4]> var_10129_begin_0 = const()[name = tensor<string, []>("op_10129_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10129_end_0 = const()[name = tensor<string, []>("op_10129_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10129_end_mask_0 = const()[name = tensor<string, []>("op_10129_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10129_cast_fp16 = slice_by_index(begin = var_10129_begin_0, end = var_10129_end_0, end_mask = var_10129_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10129_cast_fp16")];
+            tensor<int32, [4]> var_10136_begin_0 = const()[name = tensor<string, []>("op_10136_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10136_end_0 = const()[name = tensor<string, []>("op_10136_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10136_end_mask_0 = const()[name = tensor<string, []>("op_10136_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10136_cast_fp16 = slice_by_index(begin = var_10136_begin_0, end = var_10136_end_0, end_mask = var_10136_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10136_cast_fp16")];
+            tensor<int32, [4]> var_10143_begin_0 = const()[name = tensor<string, []>("op_10143_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10143_end_0 = const()[name = tensor<string, []>("op_10143_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10143_end_mask_0 = const()[name = tensor<string, []>("op_10143_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10143_cast_fp16 = slice_by_index(begin = var_10143_begin_0, end = var_10143_end_0, end_mask = var_10143_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10143_cast_fp16")];
+            tensor<int32, [4]> var_10150_begin_0 = const()[name = tensor<string, []>("op_10150_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10150_end_0 = const()[name = tensor<string, []>("op_10150_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10150_end_mask_0 = const()[name = tensor<string, []>("op_10150_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10150_cast_fp16 = slice_by_index(begin = var_10150_begin_0, end = var_10150_end_0, end_mask = var_10150_end_mask_0, x = var_9836_cast_fp16)[name = tensor<string, []>("op_10150_cast_fp16")];
+            tensor<int32, [4]> var_10157_begin_0 = const()[name = tensor<string, []>("op_10157_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10157_end_0 = const()[name = tensor<string, []>("op_10157_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10157_end_mask_0 = const()[name = tensor<string, []>("op_10157_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10157_cast_fp16 = slice_by_index(begin = var_10157_begin_0, end = var_10157_end_0, end_mask = var_10157_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10157_cast_fp16")];
+            tensor<int32, [4]> var_10164_begin_0 = const()[name = tensor<string, []>("op_10164_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10164_end_0 = const()[name = tensor<string, []>("op_10164_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10164_end_mask_0 = const()[name = tensor<string, []>("op_10164_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10164_cast_fp16 = slice_by_index(begin = var_10164_begin_0, end = var_10164_end_0, end_mask = var_10164_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10164_cast_fp16")];
+            tensor<int32, [4]> var_10171_begin_0 = const()[name = tensor<string, []>("op_10171_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10171_end_0 = const()[name = tensor<string, []>("op_10171_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10171_end_mask_0 = const()[name = tensor<string, []>("op_10171_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10171_cast_fp16 = slice_by_index(begin = var_10171_begin_0, end = var_10171_end_0, end_mask = var_10171_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10171_cast_fp16")];
+            tensor<int32, [4]> var_10178_begin_0 = const()[name = tensor<string, []>("op_10178_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10178_end_0 = const()[name = tensor<string, []>("op_10178_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10178_end_mask_0 = const()[name = tensor<string, []>("op_10178_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10178_cast_fp16 = slice_by_index(begin = var_10178_begin_0, end = var_10178_end_0, end_mask = var_10178_end_mask_0, x = var_9840_cast_fp16)[name = tensor<string, []>("op_10178_cast_fp16")];
+            tensor<int32, [4]> k_21_perm_0 = const()[name = tensor<string, []>("k_21_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_10183_begin_0 = const()[name = tensor<string, []>("op_10183_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10183_end_0 = const()[name = tensor<string, []>("op_10183_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_10183_end_mask_0 = const()[name = tensor<string, []>("op_10183_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_1 = transpose(perm = k_21_perm_0, x = key_21_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_10183_cast_fp16 = slice_by_index(begin = var_10183_begin_0, end = var_10183_end_0, end_mask = var_10183_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10183_cast_fp16")];
+            tensor<int32, [4]> var_10187_begin_0 = const()[name = tensor<string, []>("op_10187_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_10187_end_0 = const()[name = tensor<string, []>("op_10187_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_10187_end_mask_0 = const()[name = tensor<string, []>("op_10187_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10187_cast_fp16 = slice_by_index(begin = var_10187_begin_0, end = var_10187_end_0, end_mask = var_10187_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10187_cast_fp16")];
+            tensor<int32, [4]> var_10191_begin_0 = const()[name = tensor<string, []>("op_10191_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_10191_end_0 = const()[name = tensor<string, []>("op_10191_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_10191_end_mask_0 = const()[name = tensor<string, []>("op_10191_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10191_cast_fp16 = slice_by_index(begin = var_10191_begin_0, end = var_10191_end_0, end_mask = var_10191_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10191_cast_fp16")];
+            tensor<int32, [4]> var_10195_begin_0 = const()[name = tensor<string, []>("op_10195_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_10195_end_0 = const()[name = tensor<string, []>("op_10195_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_10195_end_mask_0 = const()[name = tensor<string, []>("op_10195_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10195_cast_fp16 = slice_by_index(begin = var_10195_begin_0, end = var_10195_end_0, end_mask = var_10195_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10195_cast_fp16")];
+            tensor<int32, [4]> var_10199_begin_0 = const()[name = tensor<string, []>("op_10199_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_10199_end_0 = const()[name = tensor<string, []>("op_10199_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_10199_end_mask_0 = const()[name = tensor<string, []>("op_10199_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10199_cast_fp16 = slice_by_index(begin = var_10199_begin_0, end = var_10199_end_0, end_mask = var_10199_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10199_cast_fp16")];
+            tensor<int32, [4]> var_10203_begin_0 = const()[name = tensor<string, []>("op_10203_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_10203_end_0 = const()[name = tensor<string, []>("op_10203_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_10203_end_mask_0 = const()[name = tensor<string, []>("op_10203_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10203_cast_fp16 = slice_by_index(begin = var_10203_begin_0, end = var_10203_end_0, end_mask = var_10203_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10203_cast_fp16")];
+            tensor<int32, [4]> var_10207_begin_0 = const()[name = tensor<string, []>("op_10207_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_10207_end_0 = const()[name = tensor<string, []>("op_10207_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_10207_end_mask_0 = const()[name = tensor<string, []>("op_10207_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10207_cast_fp16 = slice_by_index(begin = var_10207_begin_0, end = var_10207_end_0, end_mask = var_10207_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10207_cast_fp16")];
+            tensor<int32, [4]> var_10211_begin_0 = const()[name = tensor<string, []>("op_10211_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_10211_end_0 = const()[name = tensor<string, []>("op_10211_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_10211_end_mask_0 = const()[name = tensor<string, []>("op_10211_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10211_cast_fp16 = slice_by_index(begin = var_10211_begin_0, end = var_10211_end_0, end_mask = var_10211_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10211_cast_fp16")];
+            tensor<int32, [4]> var_10215_begin_0 = const()[name = tensor<string, []>("op_10215_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_10215_end_0 = const()[name = tensor<string, []>("op_10215_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_10215_end_mask_0 = const()[name = tensor<string, []>("op_10215_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10215_cast_fp16 = slice_by_index(begin = var_10215_begin_0, end = var_10215_end_0, end_mask = var_10215_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10215_cast_fp16")];
+            tensor<int32, [4]> var_10219_begin_0 = const()[name = tensor<string, []>("op_10219_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_10219_end_0 = const()[name = tensor<string, []>("op_10219_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_10219_end_mask_0 = const()[name = tensor<string, []>("op_10219_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10219_cast_fp16 = slice_by_index(begin = var_10219_begin_0, end = var_10219_end_0, end_mask = var_10219_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10219_cast_fp16")];
+            tensor<int32, [4]> var_10223_begin_0 = const()[name = tensor<string, []>("op_10223_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_10223_end_0 = const()[name = tensor<string, []>("op_10223_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_10223_end_mask_0 = const()[name = tensor<string, []>("op_10223_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10223_cast_fp16 = slice_by_index(begin = var_10223_begin_0, end = var_10223_end_0, end_mask = var_10223_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10223_cast_fp16")];
+            tensor<int32, [4]> var_10227_begin_0 = const()[name = tensor<string, []>("op_10227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_10227_end_0 = const()[name = tensor<string, []>("op_10227_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_10227_end_mask_0 = const()[name = tensor<string, []>("op_10227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_10227_cast_fp16 = slice_by_index(begin = var_10227_begin_0, end = var_10227_end_0, end_mask = var_10227_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_10227_cast_fp16")];
+            tensor<int32, [4]> var_10229_begin_0 = const()[name = tensor<string, []>("op_10229_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10229_end_0 = const()[name = tensor<string, []>("op_10229_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10229_end_mask_0 = const()[name = tensor<string, []>("op_10229_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10229_cast_fp16 = slice_by_index(begin = var_10229_begin_0, end = var_10229_end_0, end_mask = var_10229_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10229_cast_fp16")];
+            tensor<int32, [4]> var_10233_begin_0 = const()[name = tensor<string, []>("op_10233_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_10233_end_0 = const()[name = tensor<string, []>("op_10233_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_10233_end_mask_0 = const()[name = tensor<string, []>("op_10233_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10233_cast_fp16 = slice_by_index(begin = var_10233_begin_0, end = var_10233_end_0, end_mask = var_10233_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10233_cast_fp16")];
+            tensor<int32, [4]> var_10237_begin_0 = const()[name = tensor<string, []>("op_10237_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_10237_end_0 = const()[name = tensor<string, []>("op_10237_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_10237_end_mask_0 = const()[name = tensor<string, []>("op_10237_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10237_cast_fp16 = slice_by_index(begin = var_10237_begin_0, end = var_10237_end_0, end_mask = var_10237_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10237_cast_fp16")];
+            tensor<int32, [4]> var_10241_begin_0 = const()[name = tensor<string, []>("op_10241_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_10241_end_0 = const()[name = tensor<string, []>("op_10241_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_10241_end_mask_0 = const()[name = tensor<string, []>("op_10241_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10241_cast_fp16 = slice_by_index(begin = var_10241_begin_0, end = var_10241_end_0, end_mask = var_10241_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10241_cast_fp16")];
+            tensor<int32, [4]> var_10245_begin_0 = const()[name = tensor<string, []>("op_10245_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_10245_end_0 = const()[name = tensor<string, []>("op_10245_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_10245_end_mask_0 = const()[name = tensor<string, []>("op_10245_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10245_cast_fp16 = slice_by_index(begin = var_10245_begin_0, end = var_10245_end_0, end_mask = var_10245_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10245_cast_fp16")];
+            tensor<int32, [4]> var_10249_begin_0 = const()[name = tensor<string, []>("op_10249_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_10249_end_0 = const()[name = tensor<string, []>("op_10249_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_10249_end_mask_0 = const()[name = tensor<string, []>("op_10249_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10249_cast_fp16 = slice_by_index(begin = var_10249_begin_0, end = var_10249_end_0, end_mask = var_10249_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10249_cast_fp16")];
+            tensor<int32, [4]> var_10253_begin_0 = const()[name = tensor<string, []>("op_10253_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_10253_end_0 = const()[name = tensor<string, []>("op_10253_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_10253_end_mask_0 = const()[name = tensor<string, []>("op_10253_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10253_cast_fp16 = slice_by_index(begin = var_10253_begin_0, end = var_10253_end_0, end_mask = var_10253_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10253_cast_fp16")];
+            tensor<int32, [4]> var_10257_begin_0 = const()[name = tensor<string, []>("op_10257_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_10257_end_0 = const()[name = tensor<string, []>("op_10257_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_10257_end_mask_0 = const()[name = tensor<string, []>("op_10257_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10257_cast_fp16 = slice_by_index(begin = var_10257_begin_0, end = var_10257_end_0, end_mask = var_10257_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10257_cast_fp16")];
+            tensor<int32, [4]> var_10261_begin_0 = const()[name = tensor<string, []>("op_10261_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_10261_end_0 = const()[name = tensor<string, []>("op_10261_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_10261_end_mask_0 = const()[name = tensor<string, []>("op_10261_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10261_cast_fp16 = slice_by_index(begin = var_10261_begin_0, end = var_10261_end_0, end_mask = var_10261_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10261_cast_fp16")];
+            tensor<int32, [4]> var_10265_begin_0 = const()[name = tensor<string, []>("op_10265_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_10265_end_0 = const()[name = tensor<string, []>("op_10265_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_10265_end_mask_0 = const()[name = tensor<string, []>("op_10265_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10265_cast_fp16 = slice_by_index(begin = var_10265_begin_0, end = var_10265_end_0, end_mask = var_10265_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10265_cast_fp16")];
+            tensor<int32, [4]> var_10269_begin_0 = const()[name = tensor<string, []>("op_10269_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_10269_end_0 = const()[name = tensor<string, []>("op_10269_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_10269_end_mask_0 = const()[name = tensor<string, []>("op_10269_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10269_cast_fp16 = slice_by_index(begin = var_10269_begin_0, end = var_10269_end_0, end_mask = var_10269_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10269_cast_fp16")];
+            tensor<int32, [4]> var_10273_begin_0 = const()[name = tensor<string, []>("op_10273_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_10273_end_0 = const()[name = tensor<string, []>("op_10273_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_10273_end_mask_0 = const()[name = tensor<string, []>("op_10273_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10273_cast_fp16 = slice_by_index(begin = var_10273_begin_0, end = var_10273_end_0, end_mask = var_10273_end_mask_0, x = value_21_cast_fp16)[name = tensor<string, []>("op_10273_cast_fp16")];
+            tensor<string, []> var_10277_equation_0 = const()[name = tensor<string, []>("op_10277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10277_cast_fp16 = einsum(equation = var_10277_equation_0, values = (var_10183_cast_fp16, var_9849_cast_fp16))[name = tensor<string, []>("op_10277_cast_fp16")];
+            tensor<fp16, []> var_10278_to_fp16 = const()[name = tensor<string, []>("op_10278_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_961_cast_fp16 = mul(x = var_10277_cast_fp16, y = var_10278_to_fp16)[name = tensor<string, []>("aw_chunk_961_cast_fp16")];
+            tensor<string, []> var_10281_equation_0 = const()[name = tensor<string, []>("op_10281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10281_cast_fp16 = einsum(equation = var_10281_equation_0, values = (var_10183_cast_fp16, var_9856_cast_fp16))[name = tensor<string, []>("op_10281_cast_fp16")];
+            tensor<fp16, []> var_10282_to_fp16 = const()[name = tensor<string, []>("op_10282_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_963_cast_fp16 = mul(x = var_10281_cast_fp16, y = var_10282_to_fp16)[name = tensor<string, []>("aw_chunk_963_cast_fp16")];
+            tensor<string, []> var_10285_equation_0 = const()[name = tensor<string, []>("op_10285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10285_cast_fp16 = einsum(equation = var_10285_equation_0, values = (var_10183_cast_fp16, var_9863_cast_fp16))[name = tensor<string, []>("op_10285_cast_fp16")];
+            tensor<fp16, []> var_10286_to_fp16 = const()[name = tensor<string, []>("op_10286_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_965_cast_fp16 = mul(x = var_10285_cast_fp16, y = var_10286_to_fp16)[name = tensor<string, []>("aw_chunk_965_cast_fp16")];
+            tensor<string, []> var_10289_equation_0 = const()[name = tensor<string, []>("op_10289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10289_cast_fp16 = einsum(equation = var_10289_equation_0, values = (var_10183_cast_fp16, var_9870_cast_fp16))[name = tensor<string, []>("op_10289_cast_fp16")];
+            tensor<fp16, []> var_10290_to_fp16 = const()[name = tensor<string, []>("op_10290_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_967_cast_fp16 = mul(x = var_10289_cast_fp16, y = var_10290_to_fp16)[name = tensor<string, []>("aw_chunk_967_cast_fp16")];
+            tensor<string, []> var_10293_equation_0 = const()[name = tensor<string, []>("op_10293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10293_cast_fp16 = einsum(equation = var_10293_equation_0, values = (var_10187_cast_fp16, var_9877_cast_fp16))[name = tensor<string, []>("op_10293_cast_fp16")];
+            tensor<fp16, []> var_10294_to_fp16 = const()[name = tensor<string, []>("op_10294_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_969_cast_fp16 = mul(x = var_10293_cast_fp16, y = var_10294_to_fp16)[name = tensor<string, []>("aw_chunk_969_cast_fp16")];
+            tensor<string, []> var_10297_equation_0 = const()[name = tensor<string, []>("op_10297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10297_cast_fp16 = einsum(equation = var_10297_equation_0, values = (var_10187_cast_fp16, var_9884_cast_fp16))[name = tensor<string, []>("op_10297_cast_fp16")];
+            tensor<fp16, []> var_10298_to_fp16 = const()[name = tensor<string, []>("op_10298_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_971_cast_fp16 = mul(x = var_10297_cast_fp16, y = var_10298_to_fp16)[name = tensor<string, []>("aw_chunk_971_cast_fp16")];
+            tensor<string, []> var_10301_equation_0 = const()[name = tensor<string, []>("op_10301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10301_cast_fp16 = einsum(equation = var_10301_equation_0, values = (var_10187_cast_fp16, var_9891_cast_fp16))[name = tensor<string, []>("op_10301_cast_fp16")];
+            tensor<fp16, []> var_10302_to_fp16 = const()[name = tensor<string, []>("op_10302_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_973_cast_fp16 = mul(x = var_10301_cast_fp16, y = var_10302_to_fp16)[name = tensor<string, []>("aw_chunk_973_cast_fp16")];
+            tensor<string, []> var_10305_equation_0 = const()[name = tensor<string, []>("op_10305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10305_cast_fp16 = einsum(equation = var_10305_equation_0, values = (var_10187_cast_fp16, var_9898_cast_fp16))[name = tensor<string, []>("op_10305_cast_fp16")];
+            tensor<fp16, []> var_10306_to_fp16 = const()[name = tensor<string, []>("op_10306_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_975_cast_fp16 = mul(x = var_10305_cast_fp16, y = var_10306_to_fp16)[name = tensor<string, []>("aw_chunk_975_cast_fp16")];
+            tensor<string, []> var_10309_equation_0 = const()[name = tensor<string, []>("op_10309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10309_cast_fp16 = einsum(equation = var_10309_equation_0, values = (var_10191_cast_fp16, var_9905_cast_fp16))[name = tensor<string, []>("op_10309_cast_fp16")];
+            tensor<fp16, []> var_10310_to_fp16 = const()[name = tensor<string, []>("op_10310_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_977_cast_fp16 = mul(x = var_10309_cast_fp16, y = var_10310_to_fp16)[name = tensor<string, []>("aw_chunk_977_cast_fp16")];
+            tensor<string, []> var_10313_equation_0 = const()[name = tensor<string, []>("op_10313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10313_cast_fp16 = einsum(equation = var_10313_equation_0, values = (var_10191_cast_fp16, var_9912_cast_fp16))[name = tensor<string, []>("op_10313_cast_fp16")];
+            tensor<fp16, []> var_10314_to_fp16 = const()[name = tensor<string, []>("op_10314_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_979_cast_fp16 = mul(x = var_10313_cast_fp16, y = var_10314_to_fp16)[name = tensor<string, []>("aw_chunk_979_cast_fp16")];
+            tensor<string, []> var_10317_equation_0 = const()[name = tensor<string, []>("op_10317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10317_cast_fp16 = einsum(equation = var_10317_equation_0, values = (var_10191_cast_fp16, var_9919_cast_fp16))[name = tensor<string, []>("op_10317_cast_fp16")];
+            tensor<fp16, []> var_10318_to_fp16 = const()[name = tensor<string, []>("op_10318_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_981_cast_fp16 = mul(x = var_10317_cast_fp16, y = var_10318_to_fp16)[name = tensor<string, []>("aw_chunk_981_cast_fp16")];
+            tensor<string, []> var_10321_equation_0 = const()[name = tensor<string, []>("op_10321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10321_cast_fp16 = einsum(equation = var_10321_equation_0, values = (var_10191_cast_fp16, var_9926_cast_fp16))[name = tensor<string, []>("op_10321_cast_fp16")];
+            tensor<fp16, []> var_10322_to_fp16 = const()[name = tensor<string, []>("op_10322_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_983_cast_fp16 = mul(x = var_10321_cast_fp16, y = var_10322_to_fp16)[name = tensor<string, []>("aw_chunk_983_cast_fp16")];
+            tensor<string, []> var_10325_equation_0 = const()[name = tensor<string, []>("op_10325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10325_cast_fp16 = einsum(equation = var_10325_equation_0, values = (var_10195_cast_fp16, var_9933_cast_fp16))[name = tensor<string, []>("op_10325_cast_fp16")];
+            tensor<fp16, []> var_10326_to_fp16 = const()[name = tensor<string, []>("op_10326_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_985_cast_fp16 = mul(x = var_10325_cast_fp16, y = var_10326_to_fp16)[name = tensor<string, []>("aw_chunk_985_cast_fp16")];
+            tensor<string, []> var_10329_equation_0 = const()[name = tensor<string, []>("op_10329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10329_cast_fp16 = einsum(equation = var_10329_equation_0, values = (var_10195_cast_fp16, var_9940_cast_fp16))[name = tensor<string, []>("op_10329_cast_fp16")];
+            tensor<fp16, []> var_10330_to_fp16 = const()[name = tensor<string, []>("op_10330_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_987_cast_fp16 = mul(x = var_10329_cast_fp16, y = var_10330_to_fp16)[name = tensor<string, []>("aw_chunk_987_cast_fp16")];
+            tensor<string, []> var_10333_equation_0 = const()[name = tensor<string, []>("op_10333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10333_cast_fp16 = einsum(equation = var_10333_equation_0, values = (var_10195_cast_fp16, var_9947_cast_fp16))[name = tensor<string, []>("op_10333_cast_fp16")];
+            tensor<fp16, []> var_10334_to_fp16 = const()[name = tensor<string, []>("op_10334_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_989_cast_fp16 = mul(x = var_10333_cast_fp16, y = var_10334_to_fp16)[name = tensor<string, []>("aw_chunk_989_cast_fp16")];
+            tensor<string, []> var_10337_equation_0 = const()[name = tensor<string, []>("op_10337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10337_cast_fp16 = einsum(equation = var_10337_equation_0, values = (var_10195_cast_fp16, var_9954_cast_fp16))[name = tensor<string, []>("op_10337_cast_fp16")];
+            tensor<fp16, []> var_10338_to_fp16 = const()[name = tensor<string, []>("op_10338_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_991_cast_fp16 = mul(x = var_10337_cast_fp16, y = var_10338_to_fp16)[name = tensor<string, []>("aw_chunk_991_cast_fp16")];
+            tensor<string, []> var_10341_equation_0 = const()[name = tensor<string, []>("op_10341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10341_cast_fp16 = einsum(equation = var_10341_equation_0, values = (var_10199_cast_fp16, var_9961_cast_fp16))[name = tensor<string, []>("op_10341_cast_fp16")];
+            tensor<fp16, []> var_10342_to_fp16 = const()[name = tensor<string, []>("op_10342_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_993_cast_fp16 = mul(x = var_10341_cast_fp16, y = var_10342_to_fp16)[name = tensor<string, []>("aw_chunk_993_cast_fp16")];
+            tensor<string, []> var_10345_equation_0 = const()[name = tensor<string, []>("op_10345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10345_cast_fp16 = einsum(equation = var_10345_equation_0, values = (var_10199_cast_fp16, var_9968_cast_fp16))[name = tensor<string, []>("op_10345_cast_fp16")];
+            tensor<fp16, []> var_10346_to_fp16 = const()[name = tensor<string, []>("op_10346_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_995_cast_fp16 = mul(x = var_10345_cast_fp16, y = var_10346_to_fp16)[name = tensor<string, []>("aw_chunk_995_cast_fp16")];
+            tensor<string, []> var_10349_equation_0 = const()[name = tensor<string, []>("op_10349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10349_cast_fp16 = einsum(equation = var_10349_equation_0, values = (var_10199_cast_fp16, var_9975_cast_fp16))[name = tensor<string, []>("op_10349_cast_fp16")];
+            tensor<fp16, []> var_10350_to_fp16 = const()[name = tensor<string, []>("op_10350_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_997_cast_fp16 = mul(x = var_10349_cast_fp16, y = var_10350_to_fp16)[name = tensor<string, []>("aw_chunk_997_cast_fp16")];
+            tensor<string, []> var_10353_equation_0 = const()[name = tensor<string, []>("op_10353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10353_cast_fp16 = einsum(equation = var_10353_equation_0, values = (var_10199_cast_fp16, var_9982_cast_fp16))[name = tensor<string, []>("op_10353_cast_fp16")];
+            tensor<fp16, []> var_10354_to_fp16 = const()[name = tensor<string, []>("op_10354_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_999_cast_fp16 = mul(x = var_10353_cast_fp16, y = var_10354_to_fp16)[name = tensor<string, []>("aw_chunk_999_cast_fp16")];
+            tensor<string, []> var_10357_equation_0 = const()[name = tensor<string, []>("op_10357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10357_cast_fp16 = einsum(equation = var_10357_equation_0, values = (var_10203_cast_fp16, var_9989_cast_fp16))[name = tensor<string, []>("op_10357_cast_fp16")];
+            tensor<fp16, []> var_10358_to_fp16 = const()[name = tensor<string, []>("op_10358_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1001_cast_fp16 = mul(x = var_10357_cast_fp16, y = var_10358_to_fp16)[name = tensor<string, []>("aw_chunk_1001_cast_fp16")];
+            tensor<string, []> var_10361_equation_0 = const()[name = tensor<string, []>("op_10361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10361_cast_fp16 = einsum(equation = var_10361_equation_0, values = (var_10203_cast_fp16, var_9996_cast_fp16))[name = tensor<string, []>("op_10361_cast_fp16")];
+            tensor<fp16, []> var_10362_to_fp16 = const()[name = tensor<string, []>("op_10362_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1003_cast_fp16 = mul(x = var_10361_cast_fp16, y = var_10362_to_fp16)[name = tensor<string, []>("aw_chunk_1003_cast_fp16")];
+            tensor<string, []> var_10365_equation_0 = const()[name = tensor<string, []>("op_10365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10365_cast_fp16 = einsum(equation = var_10365_equation_0, values = (var_10203_cast_fp16, var_10003_cast_fp16))[name = tensor<string, []>("op_10365_cast_fp16")];
+            tensor<fp16, []> var_10366_to_fp16 = const()[name = tensor<string, []>("op_10366_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1005_cast_fp16 = mul(x = var_10365_cast_fp16, y = var_10366_to_fp16)[name = tensor<string, []>("aw_chunk_1005_cast_fp16")];
+            tensor<string, []> var_10369_equation_0 = const()[name = tensor<string, []>("op_10369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10369_cast_fp16 = einsum(equation = var_10369_equation_0, values = (var_10203_cast_fp16, var_10010_cast_fp16))[name = tensor<string, []>("op_10369_cast_fp16")];
+            tensor<fp16, []> var_10370_to_fp16 = const()[name = tensor<string, []>("op_10370_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1007_cast_fp16 = mul(x = var_10369_cast_fp16, y = var_10370_to_fp16)[name = tensor<string, []>("aw_chunk_1007_cast_fp16")];
+            tensor<string, []> var_10373_equation_0 = const()[name = tensor<string, []>("op_10373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10373_cast_fp16 = einsum(equation = var_10373_equation_0, values = (var_10207_cast_fp16, var_10017_cast_fp16))[name = tensor<string, []>("op_10373_cast_fp16")];
+            tensor<fp16, []> var_10374_to_fp16 = const()[name = tensor<string, []>("op_10374_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1009_cast_fp16 = mul(x = var_10373_cast_fp16, y = var_10374_to_fp16)[name = tensor<string, []>("aw_chunk_1009_cast_fp16")];
+            tensor<string, []> var_10377_equation_0 = const()[name = tensor<string, []>("op_10377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10377_cast_fp16 = einsum(equation = var_10377_equation_0, values = (var_10207_cast_fp16, var_10024_cast_fp16))[name = tensor<string, []>("op_10377_cast_fp16")];
+            tensor<fp16, []> var_10378_to_fp16 = const()[name = tensor<string, []>("op_10378_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1011_cast_fp16 = mul(x = var_10377_cast_fp16, y = var_10378_to_fp16)[name = tensor<string, []>("aw_chunk_1011_cast_fp16")];
+            tensor<string, []> var_10381_equation_0 = const()[name = tensor<string, []>("op_10381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10381_cast_fp16 = einsum(equation = var_10381_equation_0, values = (var_10207_cast_fp16, var_10031_cast_fp16))[name = tensor<string, []>("op_10381_cast_fp16")];
+            tensor<fp16, []> var_10382_to_fp16 = const()[name = tensor<string, []>("op_10382_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1013_cast_fp16 = mul(x = var_10381_cast_fp16, y = var_10382_to_fp16)[name = tensor<string, []>("aw_chunk_1013_cast_fp16")];
+            tensor<string, []> var_10385_equation_0 = const()[name = tensor<string, []>("op_10385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10385_cast_fp16 = einsum(equation = var_10385_equation_0, values = (var_10207_cast_fp16, var_10038_cast_fp16))[name = tensor<string, []>("op_10385_cast_fp16")];
+            tensor<fp16, []> var_10386_to_fp16 = const()[name = tensor<string, []>("op_10386_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1015_cast_fp16 = mul(x = var_10385_cast_fp16, y = var_10386_to_fp16)[name = tensor<string, []>("aw_chunk_1015_cast_fp16")];
+            tensor<string, []> var_10389_equation_0 = const()[name = tensor<string, []>("op_10389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10389_cast_fp16 = einsum(equation = var_10389_equation_0, values = (var_10211_cast_fp16, var_10045_cast_fp16))[name = tensor<string, []>("op_10389_cast_fp16")];
+            tensor<fp16, []> var_10390_to_fp16 = const()[name = tensor<string, []>("op_10390_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1017_cast_fp16 = mul(x = var_10389_cast_fp16, y = var_10390_to_fp16)[name = tensor<string, []>("aw_chunk_1017_cast_fp16")];
+            tensor<string, []> var_10393_equation_0 = const()[name = tensor<string, []>("op_10393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10393_cast_fp16 = einsum(equation = var_10393_equation_0, values = (var_10211_cast_fp16, var_10052_cast_fp16))[name = tensor<string, []>("op_10393_cast_fp16")];
+            tensor<fp16, []> var_10394_to_fp16 = const()[name = tensor<string, []>("op_10394_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1019_cast_fp16 = mul(x = var_10393_cast_fp16, y = var_10394_to_fp16)[name = tensor<string, []>("aw_chunk_1019_cast_fp16")];
+            tensor<string, []> var_10397_equation_0 = const()[name = tensor<string, []>("op_10397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10397_cast_fp16 = einsum(equation = var_10397_equation_0, values = (var_10211_cast_fp16, var_10059_cast_fp16))[name = tensor<string, []>("op_10397_cast_fp16")];
+            tensor<fp16, []> var_10398_to_fp16 = const()[name = tensor<string, []>("op_10398_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1021_cast_fp16 = mul(x = var_10397_cast_fp16, y = var_10398_to_fp16)[name = tensor<string, []>("aw_chunk_1021_cast_fp16")];
+            tensor<string, []> var_10401_equation_0 = const()[name = tensor<string, []>("op_10401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10401_cast_fp16 = einsum(equation = var_10401_equation_0, values = (var_10211_cast_fp16, var_10066_cast_fp16))[name = tensor<string, []>("op_10401_cast_fp16")];
+            tensor<fp16, []> var_10402_to_fp16 = const()[name = tensor<string, []>("op_10402_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1023_cast_fp16 = mul(x = var_10401_cast_fp16, y = var_10402_to_fp16)[name = tensor<string, []>("aw_chunk_1023_cast_fp16")];
+            tensor<string, []> var_10405_equation_0 = const()[name = tensor<string, []>("op_10405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10405_cast_fp16 = einsum(equation = var_10405_equation_0, values = (var_10215_cast_fp16, var_10073_cast_fp16))[name = tensor<string, []>("op_10405_cast_fp16")];
+            tensor<fp16, []> var_10406_to_fp16 = const()[name = tensor<string, []>("op_10406_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1025_cast_fp16 = mul(x = var_10405_cast_fp16, y = var_10406_to_fp16)[name = tensor<string, []>("aw_chunk_1025_cast_fp16")];
+            tensor<string, []> var_10409_equation_0 = const()[name = tensor<string, []>("op_10409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10409_cast_fp16 = einsum(equation = var_10409_equation_0, values = (var_10215_cast_fp16, var_10080_cast_fp16))[name = tensor<string, []>("op_10409_cast_fp16")];
+            tensor<fp16, []> var_10410_to_fp16 = const()[name = tensor<string, []>("op_10410_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1027_cast_fp16 = mul(x = var_10409_cast_fp16, y = var_10410_to_fp16)[name = tensor<string, []>("aw_chunk_1027_cast_fp16")];
+            tensor<string, []> var_10413_equation_0 = const()[name = tensor<string, []>("op_10413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10413_cast_fp16 = einsum(equation = var_10413_equation_0, values = (var_10215_cast_fp16, var_10087_cast_fp16))[name = tensor<string, []>("op_10413_cast_fp16")];
+            tensor<fp16, []> var_10414_to_fp16 = const()[name = tensor<string, []>("op_10414_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1029_cast_fp16 = mul(x = var_10413_cast_fp16, y = var_10414_to_fp16)[name = tensor<string, []>("aw_chunk_1029_cast_fp16")];
+            tensor<string, []> var_10417_equation_0 = const()[name = tensor<string, []>("op_10417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10417_cast_fp16 = einsum(equation = var_10417_equation_0, values = (var_10215_cast_fp16, var_10094_cast_fp16))[name = tensor<string, []>("op_10417_cast_fp16")];
+            tensor<fp16, []> var_10418_to_fp16 = const()[name = tensor<string, []>("op_10418_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1031_cast_fp16 = mul(x = var_10417_cast_fp16, y = var_10418_to_fp16)[name = tensor<string, []>("aw_chunk_1031_cast_fp16")];
+            tensor<string, []> var_10421_equation_0 = const()[name = tensor<string, []>("op_10421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10421_cast_fp16 = einsum(equation = var_10421_equation_0, values = (var_10219_cast_fp16, var_10101_cast_fp16))[name = tensor<string, []>("op_10421_cast_fp16")];
+            tensor<fp16, []> var_10422_to_fp16 = const()[name = tensor<string, []>("op_10422_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1033_cast_fp16 = mul(x = var_10421_cast_fp16, y = var_10422_to_fp16)[name = tensor<string, []>("aw_chunk_1033_cast_fp16")];
+            tensor<string, []> var_10425_equation_0 = const()[name = tensor<string, []>("op_10425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10425_cast_fp16 = einsum(equation = var_10425_equation_0, values = (var_10219_cast_fp16, var_10108_cast_fp16))[name = tensor<string, []>("op_10425_cast_fp16")];
+            tensor<fp16, []> var_10426_to_fp16 = const()[name = tensor<string, []>("op_10426_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1035_cast_fp16 = mul(x = var_10425_cast_fp16, y = var_10426_to_fp16)[name = tensor<string, []>("aw_chunk_1035_cast_fp16")];
+            tensor<string, []> var_10429_equation_0 = const()[name = tensor<string, []>("op_10429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10429_cast_fp16 = einsum(equation = var_10429_equation_0, values = (var_10219_cast_fp16, var_10115_cast_fp16))[name = tensor<string, []>("op_10429_cast_fp16")];
+            tensor<fp16, []> var_10430_to_fp16 = const()[name = tensor<string, []>("op_10430_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1037_cast_fp16 = mul(x = var_10429_cast_fp16, y = var_10430_to_fp16)[name = tensor<string, []>("aw_chunk_1037_cast_fp16")];
+            tensor<string, []> var_10433_equation_0 = const()[name = tensor<string, []>("op_10433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10433_cast_fp16 = einsum(equation = var_10433_equation_0, values = (var_10219_cast_fp16, var_10122_cast_fp16))[name = tensor<string, []>("op_10433_cast_fp16")];
+            tensor<fp16, []> var_10434_to_fp16 = const()[name = tensor<string, []>("op_10434_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1039_cast_fp16 = mul(x = var_10433_cast_fp16, y = var_10434_to_fp16)[name = tensor<string, []>("aw_chunk_1039_cast_fp16")];
+            tensor<string, []> var_10437_equation_0 = const()[name = tensor<string, []>("op_10437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10437_cast_fp16 = einsum(equation = var_10437_equation_0, values = (var_10223_cast_fp16, var_10129_cast_fp16))[name = tensor<string, []>("op_10437_cast_fp16")];
+            tensor<fp16, []> var_10438_to_fp16 = const()[name = tensor<string, []>("op_10438_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1041_cast_fp16 = mul(x = var_10437_cast_fp16, y = var_10438_to_fp16)[name = tensor<string, []>("aw_chunk_1041_cast_fp16")];
+            tensor<string, []> var_10441_equation_0 = const()[name = tensor<string, []>("op_10441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10441_cast_fp16 = einsum(equation = var_10441_equation_0, values = (var_10223_cast_fp16, var_10136_cast_fp16))[name = tensor<string, []>("op_10441_cast_fp16")];
+            tensor<fp16, []> var_10442_to_fp16 = const()[name = tensor<string, []>("op_10442_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1043_cast_fp16 = mul(x = var_10441_cast_fp16, y = var_10442_to_fp16)[name = tensor<string, []>("aw_chunk_1043_cast_fp16")];
+            tensor<string, []> var_10445_equation_0 = const()[name = tensor<string, []>("op_10445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10445_cast_fp16 = einsum(equation = var_10445_equation_0, values = (var_10223_cast_fp16, var_10143_cast_fp16))[name = tensor<string, []>("op_10445_cast_fp16")];
+            tensor<fp16, []> var_10446_to_fp16 = const()[name = tensor<string, []>("op_10446_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1045_cast_fp16 = mul(x = var_10445_cast_fp16, y = var_10446_to_fp16)[name = tensor<string, []>("aw_chunk_1045_cast_fp16")];
+            tensor<string, []> var_10449_equation_0 = const()[name = tensor<string, []>("op_10449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10449_cast_fp16 = einsum(equation = var_10449_equation_0, values = (var_10223_cast_fp16, var_10150_cast_fp16))[name = tensor<string, []>("op_10449_cast_fp16")];
+            tensor<fp16, []> var_10450_to_fp16 = const()[name = tensor<string, []>("op_10450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1047_cast_fp16 = mul(x = var_10449_cast_fp16, y = var_10450_to_fp16)[name = tensor<string, []>("aw_chunk_1047_cast_fp16")];
+            tensor<string, []> var_10453_equation_0 = const()[name = tensor<string, []>("op_10453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10453_cast_fp16 = einsum(equation = var_10453_equation_0, values = (var_10227_cast_fp16, var_10157_cast_fp16))[name = tensor<string, []>("op_10453_cast_fp16")];
+            tensor<fp16, []> var_10454_to_fp16 = const()[name = tensor<string, []>("op_10454_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1049_cast_fp16 = mul(x = var_10453_cast_fp16, y = var_10454_to_fp16)[name = tensor<string, []>("aw_chunk_1049_cast_fp16")];
+            tensor<string, []> var_10457_equation_0 = const()[name = tensor<string, []>("op_10457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10457_cast_fp16 = einsum(equation = var_10457_equation_0, values = (var_10227_cast_fp16, var_10164_cast_fp16))[name = tensor<string, []>("op_10457_cast_fp16")];
+            tensor<fp16, []> var_10458_to_fp16 = const()[name = tensor<string, []>("op_10458_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1051_cast_fp16 = mul(x = var_10457_cast_fp16, y = var_10458_to_fp16)[name = tensor<string, []>("aw_chunk_1051_cast_fp16")];
+            tensor<string, []> var_10461_equation_0 = const()[name = tensor<string, []>("op_10461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10461_cast_fp16 = einsum(equation = var_10461_equation_0, values = (var_10227_cast_fp16, var_10171_cast_fp16))[name = tensor<string, []>("op_10461_cast_fp16")];
+            tensor<fp16, []> var_10462_to_fp16 = const()[name = tensor<string, []>("op_10462_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1053_cast_fp16 = mul(x = var_10461_cast_fp16, y = var_10462_to_fp16)[name = tensor<string, []>("aw_chunk_1053_cast_fp16")];
+            tensor<string, []> var_10465_equation_0 = const()[name = tensor<string, []>("op_10465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10465_cast_fp16 = einsum(equation = var_10465_equation_0, values = (var_10227_cast_fp16, var_10178_cast_fp16))[name = tensor<string, []>("op_10465_cast_fp16")];
+            tensor<fp16, []> var_10466_to_fp16 = const()[name = tensor<string, []>("op_10466_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1055_cast_fp16 = mul(x = var_10465_cast_fp16, y = var_10466_to_fp16)[name = tensor<string, []>("aw_chunk_1055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10468_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_961_cast_fp16)[name = tensor<string, []>("op_10468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10469_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_963_cast_fp16)[name = tensor<string, []>("op_10469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10470_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_965_cast_fp16)[name = tensor<string, []>("op_10470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10471_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_967_cast_fp16)[name = tensor<string, []>("op_10471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10472_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_969_cast_fp16)[name = tensor<string, []>("op_10472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10473_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_971_cast_fp16)[name = tensor<string, []>("op_10473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10474_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_973_cast_fp16)[name = tensor<string, []>("op_10474_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10475_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_975_cast_fp16)[name = tensor<string, []>("op_10475_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10476_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_977_cast_fp16)[name = tensor<string, []>("op_10476_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10477_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_979_cast_fp16)[name = tensor<string, []>("op_10477_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10478_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_981_cast_fp16)[name = tensor<string, []>("op_10478_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10479_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_983_cast_fp16)[name = tensor<string, []>("op_10479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10480_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_985_cast_fp16)[name = tensor<string, []>("op_10480_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10481_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_987_cast_fp16)[name = tensor<string, []>("op_10481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10482_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_989_cast_fp16)[name = tensor<string, []>("op_10482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10483_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_991_cast_fp16)[name = tensor<string, []>("op_10483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10484_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_993_cast_fp16)[name = tensor<string, []>("op_10484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10485_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_995_cast_fp16)[name = tensor<string, []>("op_10485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10486_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_997_cast_fp16)[name = tensor<string, []>("op_10486_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10487_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_999_cast_fp16)[name = tensor<string, []>("op_10487_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10488_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1001_cast_fp16)[name = tensor<string, []>("op_10488_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10489_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1003_cast_fp16)[name = tensor<string, []>("op_10489_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10490_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1005_cast_fp16)[name = tensor<string, []>("op_10490_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10491_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1007_cast_fp16)[name = tensor<string, []>("op_10491_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10492_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1009_cast_fp16)[name = tensor<string, []>("op_10492_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10493_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1011_cast_fp16)[name = tensor<string, []>("op_10493_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10494_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1013_cast_fp16)[name = tensor<string, []>("op_10494_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10495_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1015_cast_fp16)[name = tensor<string, []>("op_10495_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10496_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1017_cast_fp16)[name = tensor<string, []>("op_10496_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10497_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1019_cast_fp16)[name = tensor<string, []>("op_10497_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10498_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1021_cast_fp16)[name = tensor<string, []>("op_10498_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10499_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1023_cast_fp16)[name = tensor<string, []>("op_10499_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10500_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1025_cast_fp16)[name = tensor<string, []>("op_10500_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10501_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1027_cast_fp16)[name = tensor<string, []>("op_10501_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10502_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1029_cast_fp16)[name = tensor<string, []>("op_10502_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10503_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1031_cast_fp16)[name = tensor<string, []>("op_10503_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10504_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1033_cast_fp16)[name = tensor<string, []>("op_10504_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10505_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1035_cast_fp16)[name = tensor<string, []>("op_10505_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10506_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1037_cast_fp16)[name = tensor<string, []>("op_10506_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10507_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1039_cast_fp16)[name = tensor<string, []>("op_10507_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10508_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1041_cast_fp16)[name = tensor<string, []>("op_10508_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10509_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1043_cast_fp16)[name = tensor<string, []>("op_10509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10510_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1045_cast_fp16)[name = tensor<string, []>("op_10510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10511_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1047_cast_fp16)[name = tensor<string, []>("op_10511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10512_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1049_cast_fp16)[name = tensor<string, []>("op_10512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10513_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1051_cast_fp16)[name = tensor<string, []>("op_10513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10514_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1053_cast_fp16)[name = tensor<string, []>("op_10514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_10515_cast_fp16 = softmax(axis = var_9741, x = aw_chunk_1055_cast_fp16)[name = tensor<string, []>("op_10515_cast_fp16")];
+            tensor<string, []> var_10517_equation_0 = const()[name = tensor<string, []>("op_10517_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10517_cast_fp16 = einsum(equation = var_10517_equation_0, values = (var_10229_cast_fp16, var_10468_cast_fp16))[name = tensor<string, []>("op_10517_cast_fp16")];
+            tensor<string, []> var_10519_equation_0 = const()[name = tensor<string, []>("op_10519_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10519_cast_fp16 = einsum(equation = var_10519_equation_0, values = (var_10229_cast_fp16, var_10469_cast_fp16))[name = tensor<string, []>("op_10519_cast_fp16")];
+            tensor<string, []> var_10521_equation_0 = const()[name = tensor<string, []>("op_10521_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10521_cast_fp16 = einsum(equation = var_10521_equation_0, values = (var_10229_cast_fp16, var_10470_cast_fp16))[name = tensor<string, []>("op_10521_cast_fp16")];
+            tensor<string, []> var_10523_equation_0 = const()[name = tensor<string, []>("op_10523_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10523_cast_fp16 = einsum(equation = var_10523_equation_0, values = (var_10229_cast_fp16, var_10471_cast_fp16))[name = tensor<string, []>("op_10523_cast_fp16")];
+            tensor<string, []> var_10525_equation_0 = const()[name = tensor<string, []>("op_10525_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10525_cast_fp16 = einsum(equation = var_10525_equation_0, values = (var_10233_cast_fp16, var_10472_cast_fp16))[name = tensor<string, []>("op_10525_cast_fp16")];
+            tensor<string, []> var_10527_equation_0 = const()[name = tensor<string, []>("op_10527_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10527_cast_fp16 = einsum(equation = var_10527_equation_0, values = (var_10233_cast_fp16, var_10473_cast_fp16))[name = tensor<string, []>("op_10527_cast_fp16")];
+            tensor<string, []> var_10529_equation_0 = const()[name = tensor<string, []>("op_10529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10529_cast_fp16 = einsum(equation = var_10529_equation_0, values = (var_10233_cast_fp16, var_10474_cast_fp16))[name = tensor<string, []>("op_10529_cast_fp16")];
+            tensor<string, []> var_10531_equation_0 = const()[name = tensor<string, []>("op_10531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10531_cast_fp16 = einsum(equation = var_10531_equation_0, values = (var_10233_cast_fp16, var_10475_cast_fp16))[name = tensor<string, []>("op_10531_cast_fp16")];
+            tensor<string, []> var_10533_equation_0 = const()[name = tensor<string, []>("op_10533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10533_cast_fp16 = einsum(equation = var_10533_equation_0, values = (var_10237_cast_fp16, var_10476_cast_fp16))[name = tensor<string, []>("op_10533_cast_fp16")];
+            tensor<string, []> var_10535_equation_0 = const()[name = tensor<string, []>("op_10535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10535_cast_fp16 = einsum(equation = var_10535_equation_0, values = (var_10237_cast_fp16, var_10477_cast_fp16))[name = tensor<string, []>("op_10535_cast_fp16")];
+            tensor<string, []> var_10537_equation_0 = const()[name = tensor<string, []>("op_10537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10537_cast_fp16 = einsum(equation = var_10537_equation_0, values = (var_10237_cast_fp16, var_10478_cast_fp16))[name = tensor<string, []>("op_10537_cast_fp16")];
+            tensor<string, []> var_10539_equation_0 = const()[name = tensor<string, []>("op_10539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10539_cast_fp16 = einsum(equation = var_10539_equation_0, values = (var_10237_cast_fp16, var_10479_cast_fp16))[name = tensor<string, []>("op_10539_cast_fp16")];
+            tensor<string, []> var_10541_equation_0 = const()[name = tensor<string, []>("op_10541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10541_cast_fp16 = einsum(equation = var_10541_equation_0, values = (var_10241_cast_fp16, var_10480_cast_fp16))[name = tensor<string, []>("op_10541_cast_fp16")];
+            tensor<string, []> var_10543_equation_0 = const()[name = tensor<string, []>("op_10543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10543_cast_fp16 = einsum(equation = var_10543_equation_0, values = (var_10241_cast_fp16, var_10481_cast_fp16))[name = tensor<string, []>("op_10543_cast_fp16")];
+            tensor<string, []> var_10545_equation_0 = const()[name = tensor<string, []>("op_10545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10545_cast_fp16 = einsum(equation = var_10545_equation_0, values = (var_10241_cast_fp16, var_10482_cast_fp16))[name = tensor<string, []>("op_10545_cast_fp16")];
+            tensor<string, []> var_10547_equation_0 = const()[name = tensor<string, []>("op_10547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10547_cast_fp16 = einsum(equation = var_10547_equation_0, values = (var_10241_cast_fp16, var_10483_cast_fp16))[name = tensor<string, []>("op_10547_cast_fp16")];
+            tensor<string, []> var_10549_equation_0 = const()[name = tensor<string, []>("op_10549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10549_cast_fp16 = einsum(equation = var_10549_equation_0, values = (var_10245_cast_fp16, var_10484_cast_fp16))[name = tensor<string, []>("op_10549_cast_fp16")];
+            tensor<string, []> var_10551_equation_0 = const()[name = tensor<string, []>("op_10551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10551_cast_fp16 = einsum(equation = var_10551_equation_0, values = (var_10245_cast_fp16, var_10485_cast_fp16))[name = tensor<string, []>("op_10551_cast_fp16")];
+            tensor<string, []> var_10553_equation_0 = const()[name = tensor<string, []>("op_10553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10553_cast_fp16 = einsum(equation = var_10553_equation_0, values = (var_10245_cast_fp16, var_10486_cast_fp16))[name = tensor<string, []>("op_10553_cast_fp16")];
+            tensor<string, []> var_10555_equation_0 = const()[name = tensor<string, []>("op_10555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10555_cast_fp16 = einsum(equation = var_10555_equation_0, values = (var_10245_cast_fp16, var_10487_cast_fp16))[name = tensor<string, []>("op_10555_cast_fp16")];
+            tensor<string, []> var_10557_equation_0 = const()[name = tensor<string, []>("op_10557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10557_cast_fp16 = einsum(equation = var_10557_equation_0, values = (var_10249_cast_fp16, var_10488_cast_fp16))[name = tensor<string, []>("op_10557_cast_fp16")];
+            tensor<string, []> var_10559_equation_0 = const()[name = tensor<string, []>("op_10559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10559_cast_fp16 = einsum(equation = var_10559_equation_0, values = (var_10249_cast_fp16, var_10489_cast_fp16))[name = tensor<string, []>("op_10559_cast_fp16")];
+            tensor<string, []> var_10561_equation_0 = const()[name = tensor<string, []>("op_10561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10561_cast_fp16 = einsum(equation = var_10561_equation_0, values = (var_10249_cast_fp16, var_10490_cast_fp16))[name = tensor<string, []>("op_10561_cast_fp16")];
+            tensor<string, []> var_10563_equation_0 = const()[name = tensor<string, []>("op_10563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10563_cast_fp16 = einsum(equation = var_10563_equation_0, values = (var_10249_cast_fp16, var_10491_cast_fp16))[name = tensor<string, []>("op_10563_cast_fp16")];
+            tensor<string, []> var_10565_equation_0 = const()[name = tensor<string, []>("op_10565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10565_cast_fp16 = einsum(equation = var_10565_equation_0, values = (var_10253_cast_fp16, var_10492_cast_fp16))[name = tensor<string, []>("op_10565_cast_fp16")];
+            tensor<string, []> var_10567_equation_0 = const()[name = tensor<string, []>("op_10567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10567_cast_fp16 = einsum(equation = var_10567_equation_0, values = (var_10253_cast_fp16, var_10493_cast_fp16))[name = tensor<string, []>("op_10567_cast_fp16")];
+            tensor<string, []> var_10569_equation_0 = const()[name = tensor<string, []>("op_10569_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10569_cast_fp16 = einsum(equation = var_10569_equation_0, values = (var_10253_cast_fp16, var_10494_cast_fp16))[name = tensor<string, []>("op_10569_cast_fp16")];
+            tensor<string, []> var_10571_equation_0 = const()[name = tensor<string, []>("op_10571_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10571_cast_fp16 = einsum(equation = var_10571_equation_0, values = (var_10253_cast_fp16, var_10495_cast_fp16))[name = tensor<string, []>("op_10571_cast_fp16")];
+            tensor<string, []> var_10573_equation_0 = const()[name = tensor<string, []>("op_10573_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10573_cast_fp16 = einsum(equation = var_10573_equation_0, values = (var_10257_cast_fp16, var_10496_cast_fp16))[name = tensor<string, []>("op_10573_cast_fp16")];
+            tensor<string, []> var_10575_equation_0 = const()[name = tensor<string, []>("op_10575_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10575_cast_fp16 = einsum(equation = var_10575_equation_0, values = (var_10257_cast_fp16, var_10497_cast_fp16))[name = tensor<string, []>("op_10575_cast_fp16")];
+            tensor<string, []> var_10577_equation_0 = const()[name = tensor<string, []>("op_10577_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10577_cast_fp16 = einsum(equation = var_10577_equation_0, values = (var_10257_cast_fp16, var_10498_cast_fp16))[name = tensor<string, []>("op_10577_cast_fp16")];
+            tensor<string, []> var_10579_equation_0 = const()[name = tensor<string, []>("op_10579_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10579_cast_fp16 = einsum(equation = var_10579_equation_0, values = (var_10257_cast_fp16, var_10499_cast_fp16))[name = tensor<string, []>("op_10579_cast_fp16")];
+            tensor<string, []> var_10581_equation_0 = const()[name = tensor<string, []>("op_10581_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10581_cast_fp16 = einsum(equation = var_10581_equation_0, values = (var_10261_cast_fp16, var_10500_cast_fp16))[name = tensor<string, []>("op_10581_cast_fp16")];
+            tensor<string, []> var_10583_equation_0 = const()[name = tensor<string, []>("op_10583_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10583_cast_fp16 = einsum(equation = var_10583_equation_0, values = (var_10261_cast_fp16, var_10501_cast_fp16))[name = tensor<string, []>("op_10583_cast_fp16")];
+            tensor<string, []> var_10585_equation_0 = const()[name = tensor<string, []>("op_10585_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10585_cast_fp16 = einsum(equation = var_10585_equation_0, values = (var_10261_cast_fp16, var_10502_cast_fp16))[name = tensor<string, []>("op_10585_cast_fp16")];
+            tensor<string, []> var_10587_equation_0 = const()[name = tensor<string, []>("op_10587_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10587_cast_fp16 = einsum(equation = var_10587_equation_0, values = (var_10261_cast_fp16, var_10503_cast_fp16))[name = tensor<string, []>("op_10587_cast_fp16")];
+            tensor<string, []> var_10589_equation_0 = const()[name = tensor<string, []>("op_10589_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10589_cast_fp16 = einsum(equation = var_10589_equation_0, values = (var_10265_cast_fp16, var_10504_cast_fp16))[name = tensor<string, []>("op_10589_cast_fp16")];
+            tensor<string, []> var_10591_equation_0 = const()[name = tensor<string, []>("op_10591_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10591_cast_fp16 = einsum(equation = var_10591_equation_0, values = (var_10265_cast_fp16, var_10505_cast_fp16))[name = tensor<string, []>("op_10591_cast_fp16")];
+            tensor<string, []> var_10593_equation_0 = const()[name = tensor<string, []>("op_10593_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10593_cast_fp16 = einsum(equation = var_10593_equation_0, values = (var_10265_cast_fp16, var_10506_cast_fp16))[name = tensor<string, []>("op_10593_cast_fp16")];
+            tensor<string, []> var_10595_equation_0 = const()[name = tensor<string, []>("op_10595_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10595_cast_fp16 = einsum(equation = var_10595_equation_0, values = (var_10265_cast_fp16, var_10507_cast_fp16))[name = tensor<string, []>("op_10595_cast_fp16")];
+            tensor<string, []> var_10597_equation_0 = const()[name = tensor<string, []>("op_10597_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10597_cast_fp16 = einsum(equation = var_10597_equation_0, values = (var_10269_cast_fp16, var_10508_cast_fp16))[name = tensor<string, []>("op_10597_cast_fp16")];
+            tensor<string, []> var_10599_equation_0 = const()[name = tensor<string, []>("op_10599_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10599_cast_fp16 = einsum(equation = var_10599_equation_0, values = (var_10269_cast_fp16, var_10509_cast_fp16))[name = tensor<string, []>("op_10599_cast_fp16")];
+            tensor<string, []> var_10601_equation_0 = const()[name = tensor<string, []>("op_10601_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10601_cast_fp16 = einsum(equation = var_10601_equation_0, values = (var_10269_cast_fp16, var_10510_cast_fp16))[name = tensor<string, []>("op_10601_cast_fp16")];
+            tensor<string, []> var_10603_equation_0 = const()[name = tensor<string, []>("op_10603_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10603_cast_fp16 = einsum(equation = var_10603_equation_0, values = (var_10269_cast_fp16, var_10511_cast_fp16))[name = tensor<string, []>("op_10603_cast_fp16")];
+            tensor<string, []> var_10605_equation_0 = const()[name = tensor<string, []>("op_10605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10605_cast_fp16 = einsum(equation = var_10605_equation_0, values = (var_10273_cast_fp16, var_10512_cast_fp16))[name = tensor<string, []>("op_10605_cast_fp16")];
+            tensor<string, []> var_10607_equation_0 = const()[name = tensor<string, []>("op_10607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10607_cast_fp16 = einsum(equation = var_10607_equation_0, values = (var_10273_cast_fp16, var_10513_cast_fp16))[name = tensor<string, []>("op_10607_cast_fp16")];
+            tensor<string, []> var_10609_equation_0 = const()[name = tensor<string, []>("op_10609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10609_cast_fp16 = einsum(equation = var_10609_equation_0, values = (var_10273_cast_fp16, var_10514_cast_fp16))[name = tensor<string, []>("op_10609_cast_fp16")];
+            tensor<string, []> var_10611_equation_0 = const()[name = tensor<string, []>("op_10611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_10611_cast_fp16 = einsum(equation = var_10611_equation_0, values = (var_10273_cast_fp16, var_10515_cast_fp16))[name = tensor<string, []>("op_10611_cast_fp16")];
+            tensor<bool, []> var_10613_interleave_0 = const()[name = tensor<string, []>("op_10613_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10613_cast_fp16 = concat(axis = var_9724, interleave = var_10613_interleave_0, values = (var_10517_cast_fp16, var_10519_cast_fp16, var_10521_cast_fp16, var_10523_cast_fp16))[name = tensor<string, []>("op_10613_cast_fp16")];
+            tensor<bool, []> var_10615_interleave_0 = const()[name = tensor<string, []>("op_10615_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10615_cast_fp16 = concat(axis = var_9724, interleave = var_10615_interleave_0, values = (var_10525_cast_fp16, var_10527_cast_fp16, var_10529_cast_fp16, var_10531_cast_fp16))[name = tensor<string, []>("op_10615_cast_fp16")];
+            tensor<bool, []> var_10617_interleave_0 = const()[name = tensor<string, []>("op_10617_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10617_cast_fp16 = concat(axis = var_9724, interleave = var_10617_interleave_0, values = (var_10533_cast_fp16, var_10535_cast_fp16, var_10537_cast_fp16, var_10539_cast_fp16))[name = tensor<string, []>("op_10617_cast_fp16")];
+            tensor<bool, []> var_10619_interleave_0 = const()[name = tensor<string, []>("op_10619_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10619_cast_fp16 = concat(axis = var_9724, interleave = var_10619_interleave_0, values = (var_10541_cast_fp16, var_10543_cast_fp16, var_10545_cast_fp16, var_10547_cast_fp16))[name = tensor<string, []>("op_10619_cast_fp16")];
+            tensor<bool, []> var_10621_interleave_0 = const()[name = tensor<string, []>("op_10621_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10621_cast_fp16 = concat(axis = var_9724, interleave = var_10621_interleave_0, values = (var_10549_cast_fp16, var_10551_cast_fp16, var_10553_cast_fp16, var_10555_cast_fp16))[name = tensor<string, []>("op_10621_cast_fp16")];
+            tensor<bool, []> var_10623_interleave_0 = const()[name = tensor<string, []>("op_10623_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10623_cast_fp16 = concat(axis = var_9724, interleave = var_10623_interleave_0, values = (var_10557_cast_fp16, var_10559_cast_fp16, var_10561_cast_fp16, var_10563_cast_fp16))[name = tensor<string, []>("op_10623_cast_fp16")];
+            tensor<bool, []> var_10625_interleave_0 = const()[name = tensor<string, []>("op_10625_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10625_cast_fp16 = concat(axis = var_9724, interleave = var_10625_interleave_0, values = (var_10565_cast_fp16, var_10567_cast_fp16, var_10569_cast_fp16, var_10571_cast_fp16))[name = tensor<string, []>("op_10625_cast_fp16")];
+            tensor<bool, []> var_10627_interleave_0 = const()[name = tensor<string, []>("op_10627_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10627_cast_fp16 = concat(axis = var_9724, interleave = var_10627_interleave_0, values = (var_10573_cast_fp16, var_10575_cast_fp16, var_10577_cast_fp16, var_10579_cast_fp16))[name = tensor<string, []>("op_10627_cast_fp16")];
+            tensor<bool, []> var_10629_interleave_0 = const()[name = tensor<string, []>("op_10629_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10629_cast_fp16 = concat(axis = var_9724, interleave = var_10629_interleave_0, values = (var_10581_cast_fp16, var_10583_cast_fp16, var_10585_cast_fp16, var_10587_cast_fp16))[name = tensor<string, []>("op_10629_cast_fp16")];
+            tensor<bool, []> var_10631_interleave_0 = const()[name = tensor<string, []>("op_10631_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10631_cast_fp16 = concat(axis = var_9724, interleave = var_10631_interleave_0, values = (var_10589_cast_fp16, var_10591_cast_fp16, var_10593_cast_fp16, var_10595_cast_fp16))[name = tensor<string, []>("op_10631_cast_fp16")];
+            tensor<bool, []> var_10633_interleave_0 = const()[name = tensor<string, []>("op_10633_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10633_cast_fp16 = concat(axis = var_9724, interleave = var_10633_interleave_0, values = (var_10597_cast_fp16, var_10599_cast_fp16, var_10601_cast_fp16, var_10603_cast_fp16))[name = tensor<string, []>("op_10633_cast_fp16")];
+            tensor<bool, []> var_10635_interleave_0 = const()[name = tensor<string, []>("op_10635_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_10635_cast_fp16 = concat(axis = var_9724, interleave = var_10635_interleave_0, values = (var_10605_cast_fp16, var_10607_cast_fp16, var_10609_cast_fp16, var_10611_cast_fp16))[name = tensor<string, []>("op_10635_cast_fp16")];
+            tensor<bool, []> input_81_interleave_0 = const()[name = tensor<string, []>("input_81_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_81_cast_fp16 = concat(axis = var_9741, interleave = input_81_interleave_0, values = (var_10613_cast_fp16, var_10615_cast_fp16, var_10617_cast_fp16, var_10619_cast_fp16, var_10621_cast_fp16, var_10623_cast_fp16, var_10625_cast_fp16, var_10627_cast_fp16, var_10629_cast_fp16, var_10631_cast_fp16, var_10633_cast_fp16, var_10635_cast_fp16))[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<int32, [2]> var_10640 = const()[name = tensor<string, []>("op_10640"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10642 = const()[name = tensor<string, []>("op_10642"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_43_pad_type_0 = const()[name = tensor<string, []>("obj_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_43_pad_0 = const()[name = tensor<string, []>("obj_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151515456)))];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152695168)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_43_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = var_10642, groups = var_9741, pad = obj_43_pad_0, pad_type = obj_43_pad_type_0, strides = var_10640, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> var_10648 = const()[name = tensor<string, []>("op_10648"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_43_cast_fp16 = reduce_mean(axes = var_10648, keep_dims = var_9742, x = inputs_43_cast_fp16)[name = tensor<string, []>("channels_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_43_cast_fp16 = sub(x = inputs_43_cast_fp16, y = channels_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = zero_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_sq_43_cast_fp16")];
+            tensor<int32, [1]> var_10652 = const()[name = tensor<string, []>("op_10652"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_10653_cast_fp16 = reduce_mean(axes = var_10652, keep_dims = var_9742, x = zero_mean_sq_43_cast_fp16)[name = tensor<string, []>("op_10653_cast_fp16")];
+            tensor<fp16, []> var_10654_to_fp16 = const()[name = tensor<string, []>("op_10654_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_10655_cast_fp16 = add(x = var_10653_cast_fp16, y = var_10654_to_fp16)[name = tensor<string, []>("op_10655_cast_fp16")];
+            tensor<fp16, []> denom_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0_to_fp16, x = var_10655_cast_fp16)[name = tensor<string, []>("denom_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = denom_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
+            tensor<fp16, [768]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152696768)))];
+            tensor<fp16, [768]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152698368)))];
+            tensor<fp16, []> input_83_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_83_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<int32, [2]> var_10666 = const()[name = tensor<string, []>("op_10666"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10668 = const()[name = tensor<string, []>("op_10668"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_85_pad_type_0 = const()[name = tensor<string, []>("input_85_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_85_pad_0 = const()[name = tensor<string, []>("input_85_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152699968)))];
+            tensor<fp16, [3072]> layers_10_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157418624)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_85_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = var_10668, groups = var_9741, pad = input_85_pad_0, pad_type = input_85_pad_type_0, strides = var_10666, weight = layers_10_fc1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> input_87_mode_0 = const()[name = tensor<string, []>("input_87_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<int32, [2]> var_10674 = const()[name = tensor<string, []>("op_10674"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10676 = const()[name = tensor<string, []>("op_10676"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_25_pad_type_0 = const()[name = tensor<string, []>("hidden_states_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_25_pad_0 = const()[name = tensor<string, []>("hidden_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157424832)))];
+            tensor<fp16, [768]> layers_10_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162143488)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_25_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = var_10676, groups = var_9741, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = var_10674, weight = layers_10_fc2_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_10683 = const()[name = tensor<string, []>("op_10683"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_10700 = const()[name = tensor<string, []>("op_10700"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_10701 = const()[name = tensor<string, []>("op_10701"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_10711 = const()[name = tensor<string, []>("op_10711"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_45_cast_fp16 = reduce_mean(axes = var_10711, keep_dims = var_10701, x = inputs_45_cast_fp16)[name = tensor<string, []>("channels_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_45_cast_fp16 = sub(x = inputs_45_cast_fp16, y = channels_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = zero_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_sq_45_cast_fp16")];
+            tensor<int32, [1]> var_10715 = const()[name = tensor<string, []>("op_10715"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_10716_cast_fp16 = reduce_mean(axes = var_10715, keep_dims = var_10701, x = zero_mean_sq_45_cast_fp16)[name = tensor<string, []>("op_10716_cast_fp16")];
+            tensor<fp16, []> var_10717_to_fp16 = const()[name = tensor<string, []>("op_10717_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_10718_cast_fp16 = add(x = var_10716_cast_fp16, y = var_10717_to_fp16)[name = tensor<string, []>("op_10718_cast_fp16")];
+            tensor<fp16, []> denom_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0_to_fp16, x = var_10718_cast_fp16)[name = tensor<string, []>("denom_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = denom_45_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162145088)))];
+            tensor<fp16, [768]> obj_45_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162146688)))];
+            tensor<fp16, []> obj_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor<string, []>("obj_45_cast_fp16")];
+            tensor<int32, [2]> var_10733 = const()[name = tensor<string, []>("op_10733"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10735 = const()[name = tensor<string, []>("op_10735"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162148288)))];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163328000)))];
+            tensor<fp16, [1, 768, 1, 1500]> query_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = var_10735, groups = var_10700, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_10733, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_10739 = const()[name = tensor<string, []>("op_10739"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10741 = const()[name = tensor<string, []>("op_10741"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163329600)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = conv(dilations = var_10741, groups = var_10700, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_10739, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_10746 = const()[name = tensor<string, []>("op_10746"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_10748 = const()[name = tensor<string, []>("op_10748"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164509312)))];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165689024)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = var_10748, groups = var_10700, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_10746, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_10755_begin_0 = const()[name = tensor<string, []>("op_10755_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10755_end_0 = const()[name = tensor<string, []>("op_10755_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10755_end_mask_0 = const()[name = tensor<string, []>("op_10755_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10755_cast_fp16 = slice_by_index(begin = var_10755_begin_0, end = var_10755_end_0, end_mask = var_10755_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10755_cast_fp16")];
+            tensor<int32, [4]> var_10759_begin_0 = const()[name = tensor<string, []>("op_10759_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_10759_end_0 = const()[name = tensor<string, []>("op_10759_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_10759_end_mask_0 = const()[name = tensor<string, []>("op_10759_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10759_cast_fp16 = slice_by_index(begin = var_10759_begin_0, end = var_10759_end_0, end_mask = var_10759_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10759_cast_fp16")];
+            tensor<int32, [4]> var_10763_begin_0 = const()[name = tensor<string, []>("op_10763_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_10763_end_0 = const()[name = tensor<string, []>("op_10763_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_10763_end_mask_0 = const()[name = tensor<string, []>("op_10763_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10763_cast_fp16 = slice_by_index(begin = var_10763_begin_0, end = var_10763_end_0, end_mask = var_10763_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10763_cast_fp16")];
+            tensor<int32, [4]> var_10767_begin_0 = const()[name = tensor<string, []>("op_10767_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_10767_end_0 = const()[name = tensor<string, []>("op_10767_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_10767_end_mask_0 = const()[name = tensor<string, []>("op_10767_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10767_cast_fp16 = slice_by_index(begin = var_10767_begin_0, end = var_10767_end_0, end_mask = var_10767_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10767_cast_fp16")];
+            tensor<int32, [4]> var_10771_begin_0 = const()[name = tensor<string, []>("op_10771_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_10771_end_0 = const()[name = tensor<string, []>("op_10771_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_10771_end_mask_0 = const()[name = tensor<string, []>("op_10771_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10771_cast_fp16 = slice_by_index(begin = var_10771_begin_0, end = var_10771_end_0, end_mask = var_10771_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10771_cast_fp16")];
+            tensor<int32, [4]> var_10775_begin_0 = const()[name = tensor<string, []>("op_10775_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_10775_end_0 = const()[name = tensor<string, []>("op_10775_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_10775_end_mask_0 = const()[name = tensor<string, []>("op_10775_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10775_cast_fp16 = slice_by_index(begin = var_10775_begin_0, end = var_10775_end_0, end_mask = var_10775_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10775_cast_fp16")];
+            tensor<int32, [4]> var_10779_begin_0 = const()[name = tensor<string, []>("op_10779_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_10779_end_0 = const()[name = tensor<string, []>("op_10779_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_10779_end_mask_0 = const()[name = tensor<string, []>("op_10779_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10779_cast_fp16 = slice_by_index(begin = var_10779_begin_0, end = var_10779_end_0, end_mask = var_10779_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10779_cast_fp16")];
+            tensor<int32, [4]> var_10783_begin_0 = const()[name = tensor<string, []>("op_10783_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_10783_end_0 = const()[name = tensor<string, []>("op_10783_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_10783_end_mask_0 = const()[name = tensor<string, []>("op_10783_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10783_cast_fp16 = slice_by_index(begin = var_10783_begin_0, end = var_10783_end_0, end_mask = var_10783_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10783_cast_fp16")];
+            tensor<int32, [4]> var_10787_begin_0 = const()[name = tensor<string, []>("op_10787_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_10787_end_0 = const()[name = tensor<string, []>("op_10787_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_10787_end_mask_0 = const()[name = tensor<string, []>("op_10787_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10787_cast_fp16 = slice_by_index(begin = var_10787_begin_0, end = var_10787_end_0, end_mask = var_10787_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10787_cast_fp16")];
+            tensor<int32, [4]> var_10791_begin_0 = const()[name = tensor<string, []>("op_10791_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_10791_end_0 = const()[name = tensor<string, []>("op_10791_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_10791_end_mask_0 = const()[name = tensor<string, []>("op_10791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10791_cast_fp16 = slice_by_index(begin = var_10791_begin_0, end = var_10791_end_0, end_mask = var_10791_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10791_cast_fp16")];
+            tensor<int32, [4]> var_10795_begin_0 = const()[name = tensor<string, []>("op_10795_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_10795_end_0 = const()[name = tensor<string, []>("op_10795_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_10795_end_mask_0 = const()[name = tensor<string, []>("op_10795_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10795_cast_fp16 = slice_by_index(begin = var_10795_begin_0, end = var_10795_end_0, end_mask = var_10795_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10795_cast_fp16")];
+            tensor<int32, [4]> var_10799_begin_0 = const()[name = tensor<string, []>("op_10799_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_10799_end_0 = const()[name = tensor<string, []>("op_10799_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_10799_end_mask_0 = const()[name = tensor<string, []>("op_10799_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_10799_cast_fp16 = slice_by_index(begin = var_10799_begin_0, end = var_10799_end_0, end_mask = var_10799_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_10799_cast_fp16")];
+            tensor<int32, [4]> var_10808_begin_0 = const()[name = tensor<string, []>("op_10808_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10808_end_0 = const()[name = tensor<string, []>("op_10808_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10808_end_mask_0 = const()[name = tensor<string, []>("op_10808_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10808_cast_fp16 = slice_by_index(begin = var_10808_begin_0, end = var_10808_end_0, end_mask = var_10808_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10808_cast_fp16")];
+            tensor<int32, [4]> var_10815_begin_0 = const()[name = tensor<string, []>("op_10815_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10815_end_0 = const()[name = tensor<string, []>("op_10815_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10815_end_mask_0 = const()[name = tensor<string, []>("op_10815_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10815_cast_fp16 = slice_by_index(begin = var_10815_begin_0, end = var_10815_end_0, end_mask = var_10815_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10815_cast_fp16")];
+            tensor<int32, [4]> var_10822_begin_0 = const()[name = tensor<string, []>("op_10822_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10822_end_0 = const()[name = tensor<string, []>("op_10822_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10822_end_mask_0 = const()[name = tensor<string, []>("op_10822_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10822_cast_fp16 = slice_by_index(begin = var_10822_begin_0, end = var_10822_end_0, end_mask = var_10822_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10822_cast_fp16")];
+            tensor<int32, [4]> var_10829_begin_0 = const()[name = tensor<string, []>("op_10829_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10829_end_0 = const()[name = tensor<string, []>("op_10829_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10829_end_mask_0 = const()[name = tensor<string, []>("op_10829_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10829_cast_fp16 = slice_by_index(begin = var_10829_begin_0, end = var_10829_end_0, end_mask = var_10829_end_mask_0, x = var_10755_cast_fp16)[name = tensor<string, []>("op_10829_cast_fp16")];
+            tensor<int32, [4]> var_10836_begin_0 = const()[name = tensor<string, []>("op_10836_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10836_end_0 = const()[name = tensor<string, []>("op_10836_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10836_end_mask_0 = const()[name = tensor<string, []>("op_10836_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10836_cast_fp16 = slice_by_index(begin = var_10836_begin_0, end = var_10836_end_0, end_mask = var_10836_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10836_cast_fp16")];
+            tensor<int32, [4]> var_10843_begin_0 = const()[name = tensor<string, []>("op_10843_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10843_end_0 = const()[name = tensor<string, []>("op_10843_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10843_end_mask_0 = const()[name = tensor<string, []>("op_10843_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10843_cast_fp16 = slice_by_index(begin = var_10843_begin_0, end = var_10843_end_0, end_mask = var_10843_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10843_cast_fp16")];
+            tensor<int32, [4]> var_10850_begin_0 = const()[name = tensor<string, []>("op_10850_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10850_end_0 = const()[name = tensor<string, []>("op_10850_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10850_end_mask_0 = const()[name = tensor<string, []>("op_10850_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10850_cast_fp16 = slice_by_index(begin = var_10850_begin_0, end = var_10850_end_0, end_mask = var_10850_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10850_cast_fp16")];
+            tensor<int32, [4]> var_10857_begin_0 = const()[name = tensor<string, []>("op_10857_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10857_end_0 = const()[name = tensor<string, []>("op_10857_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10857_end_mask_0 = const()[name = tensor<string, []>("op_10857_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10857_cast_fp16 = slice_by_index(begin = var_10857_begin_0, end = var_10857_end_0, end_mask = var_10857_end_mask_0, x = var_10759_cast_fp16)[name = tensor<string, []>("op_10857_cast_fp16")];
+            tensor<int32, [4]> var_10864_begin_0 = const()[name = tensor<string, []>("op_10864_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10864_end_0 = const()[name = tensor<string, []>("op_10864_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10864_end_mask_0 = const()[name = tensor<string, []>("op_10864_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10864_cast_fp16 = slice_by_index(begin = var_10864_begin_0, end = var_10864_end_0, end_mask = var_10864_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10864_cast_fp16")];
+            tensor<int32, [4]> var_10871_begin_0 = const()[name = tensor<string, []>("op_10871_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10871_end_0 = const()[name = tensor<string, []>("op_10871_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10871_end_mask_0 = const()[name = tensor<string, []>("op_10871_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10871_cast_fp16 = slice_by_index(begin = var_10871_begin_0, end = var_10871_end_0, end_mask = var_10871_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10871_cast_fp16")];
+            tensor<int32, [4]> var_10878_begin_0 = const()[name = tensor<string, []>("op_10878_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10878_end_0 = const()[name = tensor<string, []>("op_10878_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10878_end_mask_0 = const()[name = tensor<string, []>("op_10878_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10878_cast_fp16 = slice_by_index(begin = var_10878_begin_0, end = var_10878_end_0, end_mask = var_10878_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10878_cast_fp16")];
+            tensor<int32, [4]> var_10885_begin_0 = const()[name = tensor<string, []>("op_10885_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10885_end_0 = const()[name = tensor<string, []>("op_10885_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10885_end_mask_0 = const()[name = tensor<string, []>("op_10885_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10885_cast_fp16 = slice_by_index(begin = var_10885_begin_0, end = var_10885_end_0, end_mask = var_10885_end_mask_0, x = var_10763_cast_fp16)[name = tensor<string, []>("op_10885_cast_fp16")];
+            tensor<int32, [4]> var_10892_begin_0 = const()[name = tensor<string, []>("op_10892_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10892_end_0 = const()[name = tensor<string, []>("op_10892_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10892_end_mask_0 = const()[name = tensor<string, []>("op_10892_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10892_cast_fp16 = slice_by_index(begin = var_10892_begin_0, end = var_10892_end_0, end_mask = var_10892_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10892_cast_fp16")];
+            tensor<int32, [4]> var_10899_begin_0 = const()[name = tensor<string, []>("op_10899_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10899_end_0 = const()[name = tensor<string, []>("op_10899_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10899_end_mask_0 = const()[name = tensor<string, []>("op_10899_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10899_cast_fp16 = slice_by_index(begin = var_10899_begin_0, end = var_10899_end_0, end_mask = var_10899_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10899_cast_fp16")];
+            tensor<int32, [4]> var_10906_begin_0 = const()[name = tensor<string, []>("op_10906_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10906_end_0 = const()[name = tensor<string, []>("op_10906_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10906_end_mask_0 = const()[name = tensor<string, []>("op_10906_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10906_cast_fp16 = slice_by_index(begin = var_10906_begin_0, end = var_10906_end_0, end_mask = var_10906_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10906_cast_fp16")];
+            tensor<int32, [4]> var_10913_begin_0 = const()[name = tensor<string, []>("op_10913_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10913_end_0 = const()[name = tensor<string, []>("op_10913_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10913_end_mask_0 = const()[name = tensor<string, []>("op_10913_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10913_cast_fp16 = slice_by_index(begin = var_10913_begin_0, end = var_10913_end_0, end_mask = var_10913_end_mask_0, x = var_10767_cast_fp16)[name = tensor<string, []>("op_10913_cast_fp16")];
+            tensor<int32, [4]> var_10920_begin_0 = const()[name = tensor<string, []>("op_10920_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10920_end_0 = const()[name = tensor<string, []>("op_10920_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10920_end_mask_0 = const()[name = tensor<string, []>("op_10920_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10920_cast_fp16 = slice_by_index(begin = var_10920_begin_0, end = var_10920_end_0, end_mask = var_10920_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10920_cast_fp16")];
+            tensor<int32, [4]> var_10927_begin_0 = const()[name = tensor<string, []>("op_10927_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10927_end_0 = const()[name = tensor<string, []>("op_10927_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10927_end_mask_0 = const()[name = tensor<string, []>("op_10927_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10927_cast_fp16 = slice_by_index(begin = var_10927_begin_0, end = var_10927_end_0, end_mask = var_10927_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10927_cast_fp16")];
+            tensor<int32, [4]> var_10934_begin_0 = const()[name = tensor<string, []>("op_10934_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10934_end_0 = const()[name = tensor<string, []>("op_10934_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10934_end_mask_0 = const()[name = tensor<string, []>("op_10934_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10934_cast_fp16 = slice_by_index(begin = var_10934_begin_0, end = var_10934_end_0, end_mask = var_10934_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10934_cast_fp16")];
+            tensor<int32, [4]> var_10941_begin_0 = const()[name = tensor<string, []>("op_10941_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10941_end_0 = const()[name = tensor<string, []>("op_10941_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10941_end_mask_0 = const()[name = tensor<string, []>("op_10941_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10941_cast_fp16 = slice_by_index(begin = var_10941_begin_0, end = var_10941_end_0, end_mask = var_10941_end_mask_0, x = var_10771_cast_fp16)[name = tensor<string, []>("op_10941_cast_fp16")];
+            tensor<int32, [4]> var_10948_begin_0 = const()[name = tensor<string, []>("op_10948_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10948_end_0 = const()[name = tensor<string, []>("op_10948_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10948_end_mask_0 = const()[name = tensor<string, []>("op_10948_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10948_cast_fp16 = slice_by_index(begin = var_10948_begin_0, end = var_10948_end_0, end_mask = var_10948_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10948_cast_fp16")];
+            tensor<int32, [4]> var_10955_begin_0 = const()[name = tensor<string, []>("op_10955_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10955_end_0 = const()[name = tensor<string, []>("op_10955_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10955_end_mask_0 = const()[name = tensor<string, []>("op_10955_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10955_cast_fp16 = slice_by_index(begin = var_10955_begin_0, end = var_10955_end_0, end_mask = var_10955_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10955_cast_fp16")];
+            tensor<int32, [4]> var_10962_begin_0 = const()[name = tensor<string, []>("op_10962_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10962_end_0 = const()[name = tensor<string, []>("op_10962_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10962_end_mask_0 = const()[name = tensor<string, []>("op_10962_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10962_cast_fp16 = slice_by_index(begin = var_10962_begin_0, end = var_10962_end_0, end_mask = var_10962_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10962_cast_fp16")];
+            tensor<int32, [4]> var_10969_begin_0 = const()[name = tensor<string, []>("op_10969_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10969_end_0 = const()[name = tensor<string, []>("op_10969_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10969_end_mask_0 = const()[name = tensor<string, []>("op_10969_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10969_cast_fp16 = slice_by_index(begin = var_10969_begin_0, end = var_10969_end_0, end_mask = var_10969_end_mask_0, x = var_10775_cast_fp16)[name = tensor<string, []>("op_10969_cast_fp16")];
+            tensor<int32, [4]> var_10976_begin_0 = const()[name = tensor<string, []>("op_10976_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_10976_end_0 = const()[name = tensor<string, []>("op_10976_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_10976_end_mask_0 = const()[name = tensor<string, []>("op_10976_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10976_cast_fp16 = slice_by_index(begin = var_10976_begin_0, end = var_10976_end_0, end_mask = var_10976_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10976_cast_fp16")];
+            tensor<int32, [4]> var_10983_begin_0 = const()[name = tensor<string, []>("op_10983_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_10983_end_0 = const()[name = tensor<string, []>("op_10983_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_10983_end_mask_0 = const()[name = tensor<string, []>("op_10983_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10983_cast_fp16 = slice_by_index(begin = var_10983_begin_0, end = var_10983_end_0, end_mask = var_10983_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10983_cast_fp16")];
+            tensor<int32, [4]> var_10990_begin_0 = const()[name = tensor<string, []>("op_10990_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_10990_end_0 = const()[name = tensor<string, []>("op_10990_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_10990_end_mask_0 = const()[name = tensor<string, []>("op_10990_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10990_cast_fp16 = slice_by_index(begin = var_10990_begin_0, end = var_10990_end_0, end_mask = var_10990_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10990_cast_fp16")];
+            tensor<int32, [4]> var_10997_begin_0 = const()[name = tensor<string, []>("op_10997_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_10997_end_0 = const()[name = tensor<string, []>("op_10997_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_10997_end_mask_0 = const()[name = tensor<string, []>("op_10997_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_10997_cast_fp16 = slice_by_index(begin = var_10997_begin_0, end = var_10997_end_0, end_mask = var_10997_end_mask_0, x = var_10779_cast_fp16)[name = tensor<string, []>("op_10997_cast_fp16")];
+            tensor<int32, [4]> var_11004_begin_0 = const()[name = tensor<string, []>("op_11004_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11004_end_0 = const()[name = tensor<string, []>("op_11004_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11004_end_mask_0 = const()[name = tensor<string, []>("op_11004_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11004_cast_fp16 = slice_by_index(begin = var_11004_begin_0, end = var_11004_end_0, end_mask = var_11004_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11004_cast_fp16")];
+            tensor<int32, [4]> var_11011_begin_0 = const()[name = tensor<string, []>("op_11011_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11011_end_0 = const()[name = tensor<string, []>("op_11011_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11011_end_mask_0 = const()[name = tensor<string, []>("op_11011_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11011_cast_fp16 = slice_by_index(begin = var_11011_begin_0, end = var_11011_end_0, end_mask = var_11011_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11011_cast_fp16")];
+            tensor<int32, [4]> var_11018_begin_0 = const()[name = tensor<string, []>("op_11018_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11018_end_0 = const()[name = tensor<string, []>("op_11018_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11018_end_mask_0 = const()[name = tensor<string, []>("op_11018_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11018_cast_fp16 = slice_by_index(begin = var_11018_begin_0, end = var_11018_end_0, end_mask = var_11018_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11018_cast_fp16")];
+            tensor<int32, [4]> var_11025_begin_0 = const()[name = tensor<string, []>("op_11025_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11025_end_0 = const()[name = tensor<string, []>("op_11025_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11025_end_mask_0 = const()[name = tensor<string, []>("op_11025_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11025_cast_fp16 = slice_by_index(begin = var_11025_begin_0, end = var_11025_end_0, end_mask = var_11025_end_mask_0, x = var_10783_cast_fp16)[name = tensor<string, []>("op_11025_cast_fp16")];
+            tensor<int32, [4]> var_11032_begin_0 = const()[name = tensor<string, []>("op_11032_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11032_end_0 = const()[name = tensor<string, []>("op_11032_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11032_end_mask_0 = const()[name = tensor<string, []>("op_11032_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11032_cast_fp16 = slice_by_index(begin = var_11032_begin_0, end = var_11032_end_0, end_mask = var_11032_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11032_cast_fp16")];
+            tensor<int32, [4]> var_11039_begin_0 = const()[name = tensor<string, []>("op_11039_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11039_end_0 = const()[name = tensor<string, []>("op_11039_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11039_end_mask_0 = const()[name = tensor<string, []>("op_11039_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11039_cast_fp16 = slice_by_index(begin = var_11039_begin_0, end = var_11039_end_0, end_mask = var_11039_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11039_cast_fp16")];
+            tensor<int32, [4]> var_11046_begin_0 = const()[name = tensor<string, []>("op_11046_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11046_end_0 = const()[name = tensor<string, []>("op_11046_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11046_end_mask_0 = const()[name = tensor<string, []>("op_11046_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11046_cast_fp16 = slice_by_index(begin = var_11046_begin_0, end = var_11046_end_0, end_mask = var_11046_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11046_cast_fp16")];
+            tensor<int32, [4]> var_11053_begin_0 = const()[name = tensor<string, []>("op_11053_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11053_end_0 = const()[name = tensor<string, []>("op_11053_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11053_end_mask_0 = const()[name = tensor<string, []>("op_11053_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11053_cast_fp16 = slice_by_index(begin = var_11053_begin_0, end = var_11053_end_0, end_mask = var_11053_end_mask_0, x = var_10787_cast_fp16)[name = tensor<string, []>("op_11053_cast_fp16")];
+            tensor<int32, [4]> var_11060_begin_0 = const()[name = tensor<string, []>("op_11060_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11060_end_0 = const()[name = tensor<string, []>("op_11060_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11060_end_mask_0 = const()[name = tensor<string, []>("op_11060_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11060_cast_fp16 = slice_by_index(begin = var_11060_begin_0, end = var_11060_end_0, end_mask = var_11060_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11060_cast_fp16")];
+            tensor<int32, [4]> var_11067_begin_0 = const()[name = tensor<string, []>("op_11067_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11067_end_0 = const()[name = tensor<string, []>("op_11067_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11067_end_mask_0 = const()[name = tensor<string, []>("op_11067_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11067_cast_fp16 = slice_by_index(begin = var_11067_begin_0, end = var_11067_end_0, end_mask = var_11067_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11067_cast_fp16")];
+            tensor<int32, [4]> var_11074_begin_0 = const()[name = tensor<string, []>("op_11074_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11074_end_0 = const()[name = tensor<string, []>("op_11074_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11074_end_mask_0 = const()[name = tensor<string, []>("op_11074_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11074_cast_fp16 = slice_by_index(begin = var_11074_begin_0, end = var_11074_end_0, end_mask = var_11074_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11074_cast_fp16")];
+            tensor<int32, [4]> var_11081_begin_0 = const()[name = tensor<string, []>("op_11081_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11081_end_0 = const()[name = tensor<string, []>("op_11081_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11081_end_mask_0 = const()[name = tensor<string, []>("op_11081_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11081_cast_fp16 = slice_by_index(begin = var_11081_begin_0, end = var_11081_end_0, end_mask = var_11081_end_mask_0, x = var_10791_cast_fp16)[name = tensor<string, []>("op_11081_cast_fp16")];
+            tensor<int32, [4]> var_11088_begin_0 = const()[name = tensor<string, []>("op_11088_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11088_end_0 = const()[name = tensor<string, []>("op_11088_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11088_end_mask_0 = const()[name = tensor<string, []>("op_11088_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11088_cast_fp16 = slice_by_index(begin = var_11088_begin_0, end = var_11088_end_0, end_mask = var_11088_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11088_cast_fp16")];
+            tensor<int32, [4]> var_11095_begin_0 = const()[name = tensor<string, []>("op_11095_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11095_end_0 = const()[name = tensor<string, []>("op_11095_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11095_end_mask_0 = const()[name = tensor<string, []>("op_11095_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11095_cast_fp16 = slice_by_index(begin = var_11095_begin_0, end = var_11095_end_0, end_mask = var_11095_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11095_cast_fp16")];
+            tensor<int32, [4]> var_11102_begin_0 = const()[name = tensor<string, []>("op_11102_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11102_end_0 = const()[name = tensor<string, []>("op_11102_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11102_end_mask_0 = const()[name = tensor<string, []>("op_11102_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11102_cast_fp16 = slice_by_index(begin = var_11102_begin_0, end = var_11102_end_0, end_mask = var_11102_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11102_cast_fp16")];
+            tensor<int32, [4]> var_11109_begin_0 = const()[name = tensor<string, []>("op_11109_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11109_end_0 = const()[name = tensor<string, []>("op_11109_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11109_end_mask_0 = const()[name = tensor<string, []>("op_11109_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11109_cast_fp16 = slice_by_index(begin = var_11109_begin_0, end = var_11109_end_0, end_mask = var_11109_end_mask_0, x = var_10795_cast_fp16)[name = tensor<string, []>("op_11109_cast_fp16")];
+            tensor<int32, [4]> var_11116_begin_0 = const()[name = tensor<string, []>("op_11116_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11116_end_0 = const()[name = tensor<string, []>("op_11116_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_11116_end_mask_0 = const()[name = tensor<string, []>("op_11116_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11116_cast_fp16 = slice_by_index(begin = var_11116_begin_0, end = var_11116_end_0, end_mask = var_11116_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11116_cast_fp16")];
+            tensor<int32, [4]> var_11123_begin_0 = const()[name = tensor<string, []>("op_11123_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_11123_end_0 = const()[name = tensor<string, []>("op_11123_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_11123_end_mask_0 = const()[name = tensor<string, []>("op_11123_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11123_cast_fp16 = slice_by_index(begin = var_11123_begin_0, end = var_11123_end_0, end_mask = var_11123_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11123_cast_fp16")];
+            tensor<int32, [4]> var_11130_begin_0 = const()[name = tensor<string, []>("op_11130_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_11130_end_0 = const()[name = tensor<string, []>("op_11130_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_11130_end_mask_0 = const()[name = tensor<string, []>("op_11130_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11130_cast_fp16 = slice_by_index(begin = var_11130_begin_0, end = var_11130_end_0, end_mask = var_11130_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11130_cast_fp16")];
+            tensor<int32, [4]> var_11137_begin_0 = const()[name = tensor<string, []>("op_11137_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_11137_end_0 = const()[name = tensor<string, []>("op_11137_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11137_end_mask_0 = const()[name = tensor<string, []>("op_11137_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_11137_cast_fp16 = slice_by_index(begin = var_11137_begin_0, end = var_11137_end_0, end_mask = var_11137_end_mask_0, x = var_10799_cast_fp16)[name = tensor<string, []>("op_11137_cast_fp16")];
+            tensor<int32, [4]> k_perm_0 = const()[name = tensor<string, []>("k_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_11142_begin_0 = const()[name = tensor<string, []>("op_11142_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11142_end_0 = const()[name = tensor<string, []>("op_11142_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_11142_end_mask_0 = const()[name = tensor<string, []>("op_11142_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 768]> transpose_0 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1500, 1, 64]> var_11142_cast_fp16 = slice_by_index(begin = var_11142_begin_0, end = var_11142_end_0, end_mask = var_11142_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11142_cast_fp16")];
+            tensor<int32, [4]> var_11146_begin_0 = const()[name = tensor<string, []>("op_11146_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_11146_end_0 = const()[name = tensor<string, []>("op_11146_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_11146_end_mask_0 = const()[name = tensor<string, []>("op_11146_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11146_cast_fp16 = slice_by_index(begin = var_11146_begin_0, end = var_11146_end_0, end_mask = var_11146_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11146_cast_fp16")];
+            tensor<int32, [4]> var_11150_begin_0 = const()[name = tensor<string, []>("op_11150_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_11150_end_0 = const()[name = tensor<string, []>("op_11150_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_11150_end_mask_0 = const()[name = tensor<string, []>("op_11150_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11150_cast_fp16 = slice_by_index(begin = var_11150_begin_0, end = var_11150_end_0, end_mask = var_11150_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11150_cast_fp16")];
+            tensor<int32, [4]> var_11154_begin_0 = const()[name = tensor<string, []>("op_11154_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_11154_end_0 = const()[name = tensor<string, []>("op_11154_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_11154_end_mask_0 = const()[name = tensor<string, []>("op_11154_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11154_cast_fp16 = slice_by_index(begin = var_11154_begin_0, end = var_11154_end_0, end_mask = var_11154_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11154_cast_fp16")];
+            tensor<int32, [4]> var_11158_begin_0 = const()[name = tensor<string, []>("op_11158_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_11158_end_0 = const()[name = tensor<string, []>("op_11158_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_11158_end_mask_0 = const()[name = tensor<string, []>("op_11158_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11158_cast_fp16 = slice_by_index(begin = var_11158_begin_0, end = var_11158_end_0, end_mask = var_11158_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11158_cast_fp16")];
+            tensor<int32, [4]> var_11162_begin_0 = const()[name = tensor<string, []>("op_11162_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_11162_end_0 = const()[name = tensor<string, []>("op_11162_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_11162_end_mask_0 = const()[name = tensor<string, []>("op_11162_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11162_cast_fp16 = slice_by_index(begin = var_11162_begin_0, end = var_11162_end_0, end_mask = var_11162_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11162_cast_fp16")];
+            tensor<int32, [4]> var_11166_begin_0 = const()[name = tensor<string, []>("op_11166_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 384])];
+            tensor<int32, [4]> var_11166_end_0 = const()[name = tensor<string, []>("op_11166_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 448])];
+            tensor<bool, [4]> var_11166_end_mask_0 = const()[name = tensor<string, []>("op_11166_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11166_cast_fp16 = slice_by_index(begin = var_11166_begin_0, end = var_11166_end_0, end_mask = var_11166_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11166_cast_fp16")];
+            tensor<int32, [4]> var_11170_begin_0 = const()[name = tensor<string, []>("op_11170_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 448])];
+            tensor<int32, [4]> var_11170_end_0 = const()[name = tensor<string, []>("op_11170_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 512])];
+            tensor<bool, [4]> var_11170_end_mask_0 = const()[name = tensor<string, []>("op_11170_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11170_cast_fp16 = slice_by_index(begin = var_11170_begin_0, end = var_11170_end_0, end_mask = var_11170_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11170_cast_fp16")];
+            tensor<int32, [4]> var_11174_begin_0 = const()[name = tensor<string, []>("op_11174_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 512])];
+            tensor<int32, [4]> var_11174_end_0 = const()[name = tensor<string, []>("op_11174_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 576])];
+            tensor<bool, [4]> var_11174_end_mask_0 = const()[name = tensor<string, []>("op_11174_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11174_cast_fp16 = slice_by_index(begin = var_11174_begin_0, end = var_11174_end_0, end_mask = var_11174_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11174_cast_fp16")];
+            tensor<int32, [4]> var_11178_begin_0 = const()[name = tensor<string, []>("op_11178_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 576])];
+            tensor<int32, [4]> var_11178_end_0 = const()[name = tensor<string, []>("op_11178_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 640])];
+            tensor<bool, [4]> var_11178_end_mask_0 = const()[name = tensor<string, []>("op_11178_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11178_cast_fp16 = slice_by_index(begin = var_11178_begin_0, end = var_11178_end_0, end_mask = var_11178_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11178_cast_fp16")];
+            tensor<int32, [4]> var_11182_begin_0 = const()[name = tensor<string, []>("op_11182_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 640])];
+            tensor<int32, [4]> var_11182_end_0 = const()[name = tensor<string, []>("op_11182_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 704])];
+            tensor<bool, [4]> var_11182_end_mask_0 = const()[name = tensor<string, []>("op_11182_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11182_cast_fp16 = slice_by_index(begin = var_11182_begin_0, end = var_11182_end_0, end_mask = var_11182_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11182_cast_fp16")];
+            tensor<int32, [4]> var_11186_begin_0 = const()[name = tensor<string, []>("op_11186_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 704])];
+            tensor<int32, [4]> var_11186_end_0 = const()[name = tensor<string, []>("op_11186_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 768])];
+            tensor<bool, [4]> var_11186_end_mask_0 = const()[name = tensor<string, []>("op_11186_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_11186_cast_fp16 = slice_by_index(begin = var_11186_begin_0, end = var_11186_end_0, end_mask = var_11186_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_11186_cast_fp16")];
+            tensor<int32, [4]> var_11188_begin_0 = const()[name = tensor<string, []>("op_11188_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_11188_end_0 = const()[name = tensor<string, []>("op_11188_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_11188_end_mask_0 = const()[name = tensor<string, []>("op_11188_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11188_cast_fp16 = slice_by_index(begin = var_11188_begin_0, end = var_11188_end_0, end_mask = var_11188_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11188_cast_fp16")];
+            tensor<int32, [4]> var_11192_begin_0 = const()[name = tensor<string, []>("op_11192_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_11192_end_0 = const()[name = tensor<string, []>("op_11192_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_11192_end_mask_0 = const()[name = tensor<string, []>("op_11192_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11192_cast_fp16 = slice_by_index(begin = var_11192_begin_0, end = var_11192_end_0, end_mask = var_11192_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11192_cast_fp16")];
+            tensor<int32, [4]> var_11196_begin_0 = const()[name = tensor<string, []>("op_11196_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_11196_end_0 = const()[name = tensor<string, []>("op_11196_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_11196_end_mask_0 = const()[name = tensor<string, []>("op_11196_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11196_cast_fp16 = slice_by_index(begin = var_11196_begin_0, end = var_11196_end_0, end_mask = var_11196_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11196_cast_fp16")];
+            tensor<int32, [4]> var_11200_begin_0 = const()[name = tensor<string, []>("op_11200_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_11200_end_0 = const()[name = tensor<string, []>("op_11200_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_11200_end_mask_0 = const()[name = tensor<string, []>("op_11200_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11200_cast_fp16 = slice_by_index(begin = var_11200_begin_0, end = var_11200_end_0, end_mask = var_11200_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11200_cast_fp16")];
+            tensor<int32, [4]> var_11204_begin_0 = const()[name = tensor<string, []>("op_11204_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_11204_end_0 = const()[name = tensor<string, []>("op_11204_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_11204_end_mask_0 = const()[name = tensor<string, []>("op_11204_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11204_cast_fp16 = slice_by_index(begin = var_11204_begin_0, end = var_11204_end_0, end_mask = var_11204_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11204_cast_fp16")];
+            tensor<int32, [4]> var_11208_begin_0 = const()[name = tensor<string, []>("op_11208_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_11208_end_0 = const()[name = tensor<string, []>("op_11208_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_11208_end_mask_0 = const()[name = tensor<string, []>("op_11208_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11208_cast_fp16 = slice_by_index(begin = var_11208_begin_0, end = var_11208_end_0, end_mask = var_11208_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11208_cast_fp16")];
+            tensor<int32, [4]> var_11212_begin_0 = const()[name = tensor<string, []>("op_11212_begin_0"), val = tensor<int32, [4]>([0, 384, 0, 0])];
+            tensor<int32, [4]> var_11212_end_0 = const()[name = tensor<string, []>("op_11212_end_0"), val = tensor<int32, [4]>([1, 448, 1, 1500])];
+            tensor<bool, [4]> var_11212_end_mask_0 = const()[name = tensor<string, []>("op_11212_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11212_cast_fp16 = slice_by_index(begin = var_11212_begin_0, end = var_11212_end_0, end_mask = var_11212_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11212_cast_fp16")];
+            tensor<int32, [4]> var_11216_begin_0 = const()[name = tensor<string, []>("op_11216_begin_0"), val = tensor<int32, [4]>([0, 448, 0, 0])];
+            tensor<int32, [4]> var_11216_end_0 = const()[name = tensor<string, []>("op_11216_end_0"), val = tensor<int32, [4]>([1, 512, 1, 1500])];
+            tensor<bool, [4]> var_11216_end_mask_0 = const()[name = tensor<string, []>("op_11216_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11216_cast_fp16 = slice_by_index(begin = var_11216_begin_0, end = var_11216_end_0, end_mask = var_11216_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11216_cast_fp16")];
+            tensor<int32, [4]> var_11220_begin_0 = const()[name = tensor<string, []>("op_11220_begin_0"), val = tensor<int32, [4]>([0, 512, 0, 0])];
+            tensor<int32, [4]> var_11220_end_0 = const()[name = tensor<string, []>("op_11220_end_0"), val = tensor<int32, [4]>([1, 576, 1, 1500])];
+            tensor<bool, [4]> var_11220_end_mask_0 = const()[name = tensor<string, []>("op_11220_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11220_cast_fp16 = slice_by_index(begin = var_11220_begin_0, end = var_11220_end_0, end_mask = var_11220_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11220_cast_fp16")];
+            tensor<int32, [4]> var_11224_begin_0 = const()[name = tensor<string, []>("op_11224_begin_0"), val = tensor<int32, [4]>([0, 576, 0, 0])];
+            tensor<int32, [4]> var_11224_end_0 = const()[name = tensor<string, []>("op_11224_end_0"), val = tensor<int32, [4]>([1, 640, 1, 1500])];
+            tensor<bool, [4]> var_11224_end_mask_0 = const()[name = tensor<string, []>("op_11224_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11224_cast_fp16 = slice_by_index(begin = var_11224_begin_0, end = var_11224_end_0, end_mask = var_11224_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11224_cast_fp16")];
+            tensor<int32, [4]> var_11228_begin_0 = const()[name = tensor<string, []>("op_11228_begin_0"), val = tensor<int32, [4]>([0, 640, 0, 0])];
+            tensor<int32, [4]> var_11228_end_0 = const()[name = tensor<string, []>("op_11228_end_0"), val = tensor<int32, [4]>([1, 704, 1, 1500])];
+            tensor<bool, [4]> var_11228_end_mask_0 = const()[name = tensor<string, []>("op_11228_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11228_cast_fp16 = slice_by_index(begin = var_11228_begin_0, end = var_11228_end_0, end_mask = var_11228_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11228_cast_fp16")];
+            tensor<int32, [4]> var_11232_begin_0 = const()[name = tensor<string, []>("op_11232_begin_0"), val = tensor<int32, [4]>([0, 704, 0, 0])];
+            tensor<int32, [4]> var_11232_end_0 = const()[name = tensor<string, []>("op_11232_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1500])];
+            tensor<bool, [4]> var_11232_end_mask_0 = const()[name = tensor<string, []>("op_11232_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_11232_cast_fp16 = slice_by_index(begin = var_11232_begin_0, end = var_11232_end_0, end_mask = var_11232_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_11232_cast_fp16")];
+            tensor<string, []> var_11236_equation_0 = const()[name = tensor<string, []>("op_11236_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11236_cast_fp16 = einsum(equation = var_11236_equation_0, values = (var_11142_cast_fp16, var_10808_cast_fp16))[name = tensor<string, []>("op_11236_cast_fp16")];
+            tensor<fp16, []> var_11237_to_fp16 = const()[name = tensor<string, []>("op_11237_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1057_cast_fp16 = mul(x = var_11236_cast_fp16, y = var_11237_to_fp16)[name = tensor<string, []>("aw_chunk_1057_cast_fp16")];
+            tensor<string, []> var_11240_equation_0 = const()[name = tensor<string, []>("op_11240_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11240_cast_fp16 = einsum(equation = var_11240_equation_0, values = (var_11142_cast_fp16, var_10815_cast_fp16))[name = tensor<string, []>("op_11240_cast_fp16")];
+            tensor<fp16, []> var_11241_to_fp16 = const()[name = tensor<string, []>("op_11241_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1059_cast_fp16 = mul(x = var_11240_cast_fp16, y = var_11241_to_fp16)[name = tensor<string, []>("aw_chunk_1059_cast_fp16")];
+            tensor<string, []> var_11244_equation_0 = const()[name = tensor<string, []>("op_11244_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11244_cast_fp16 = einsum(equation = var_11244_equation_0, values = (var_11142_cast_fp16, var_10822_cast_fp16))[name = tensor<string, []>("op_11244_cast_fp16")];
+            tensor<fp16, []> var_11245_to_fp16 = const()[name = tensor<string, []>("op_11245_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1061_cast_fp16 = mul(x = var_11244_cast_fp16, y = var_11245_to_fp16)[name = tensor<string, []>("aw_chunk_1061_cast_fp16")];
+            tensor<string, []> var_11248_equation_0 = const()[name = tensor<string, []>("op_11248_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11248_cast_fp16 = einsum(equation = var_11248_equation_0, values = (var_11142_cast_fp16, var_10829_cast_fp16))[name = tensor<string, []>("op_11248_cast_fp16")];
+            tensor<fp16, []> var_11249_to_fp16 = const()[name = tensor<string, []>("op_11249_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1063_cast_fp16 = mul(x = var_11248_cast_fp16, y = var_11249_to_fp16)[name = tensor<string, []>("aw_chunk_1063_cast_fp16")];
+            tensor<string, []> var_11252_equation_0 = const()[name = tensor<string, []>("op_11252_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11252_cast_fp16 = einsum(equation = var_11252_equation_0, values = (var_11146_cast_fp16, var_10836_cast_fp16))[name = tensor<string, []>("op_11252_cast_fp16")];
+            tensor<fp16, []> var_11253_to_fp16 = const()[name = tensor<string, []>("op_11253_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1065_cast_fp16 = mul(x = var_11252_cast_fp16, y = var_11253_to_fp16)[name = tensor<string, []>("aw_chunk_1065_cast_fp16")];
+            tensor<string, []> var_11256_equation_0 = const()[name = tensor<string, []>("op_11256_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11256_cast_fp16 = einsum(equation = var_11256_equation_0, values = (var_11146_cast_fp16, var_10843_cast_fp16))[name = tensor<string, []>("op_11256_cast_fp16")];
+            tensor<fp16, []> var_11257_to_fp16 = const()[name = tensor<string, []>("op_11257_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1067_cast_fp16 = mul(x = var_11256_cast_fp16, y = var_11257_to_fp16)[name = tensor<string, []>("aw_chunk_1067_cast_fp16")];
+            tensor<string, []> var_11260_equation_0 = const()[name = tensor<string, []>("op_11260_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11260_cast_fp16 = einsum(equation = var_11260_equation_0, values = (var_11146_cast_fp16, var_10850_cast_fp16))[name = tensor<string, []>("op_11260_cast_fp16")];
+            tensor<fp16, []> var_11261_to_fp16 = const()[name = tensor<string, []>("op_11261_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1069_cast_fp16 = mul(x = var_11260_cast_fp16, y = var_11261_to_fp16)[name = tensor<string, []>("aw_chunk_1069_cast_fp16")];
+            tensor<string, []> var_11264_equation_0 = const()[name = tensor<string, []>("op_11264_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11264_cast_fp16 = einsum(equation = var_11264_equation_0, values = (var_11146_cast_fp16, var_10857_cast_fp16))[name = tensor<string, []>("op_11264_cast_fp16")];
+            tensor<fp16, []> var_11265_to_fp16 = const()[name = tensor<string, []>("op_11265_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1071_cast_fp16 = mul(x = var_11264_cast_fp16, y = var_11265_to_fp16)[name = tensor<string, []>("aw_chunk_1071_cast_fp16")];
+            tensor<string, []> var_11268_equation_0 = const()[name = tensor<string, []>("op_11268_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11268_cast_fp16 = einsum(equation = var_11268_equation_0, values = (var_11150_cast_fp16, var_10864_cast_fp16))[name = tensor<string, []>("op_11268_cast_fp16")];
+            tensor<fp16, []> var_11269_to_fp16 = const()[name = tensor<string, []>("op_11269_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1073_cast_fp16 = mul(x = var_11268_cast_fp16, y = var_11269_to_fp16)[name = tensor<string, []>("aw_chunk_1073_cast_fp16")];
+            tensor<string, []> var_11272_equation_0 = const()[name = tensor<string, []>("op_11272_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11272_cast_fp16 = einsum(equation = var_11272_equation_0, values = (var_11150_cast_fp16, var_10871_cast_fp16))[name = tensor<string, []>("op_11272_cast_fp16")];
+            tensor<fp16, []> var_11273_to_fp16 = const()[name = tensor<string, []>("op_11273_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1075_cast_fp16 = mul(x = var_11272_cast_fp16, y = var_11273_to_fp16)[name = tensor<string, []>("aw_chunk_1075_cast_fp16")];
+            tensor<string, []> var_11276_equation_0 = const()[name = tensor<string, []>("op_11276_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11276_cast_fp16 = einsum(equation = var_11276_equation_0, values = (var_11150_cast_fp16, var_10878_cast_fp16))[name = tensor<string, []>("op_11276_cast_fp16")];
+            tensor<fp16, []> var_11277_to_fp16 = const()[name = tensor<string, []>("op_11277_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1077_cast_fp16 = mul(x = var_11276_cast_fp16, y = var_11277_to_fp16)[name = tensor<string, []>("aw_chunk_1077_cast_fp16")];
+            tensor<string, []> var_11280_equation_0 = const()[name = tensor<string, []>("op_11280_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11280_cast_fp16 = einsum(equation = var_11280_equation_0, values = (var_11150_cast_fp16, var_10885_cast_fp16))[name = tensor<string, []>("op_11280_cast_fp16")];
+            tensor<fp16, []> var_11281_to_fp16 = const()[name = tensor<string, []>("op_11281_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1079_cast_fp16 = mul(x = var_11280_cast_fp16, y = var_11281_to_fp16)[name = tensor<string, []>("aw_chunk_1079_cast_fp16")];
+            tensor<string, []> var_11284_equation_0 = const()[name = tensor<string, []>("op_11284_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11284_cast_fp16 = einsum(equation = var_11284_equation_0, values = (var_11154_cast_fp16, var_10892_cast_fp16))[name = tensor<string, []>("op_11284_cast_fp16")];
+            tensor<fp16, []> var_11285_to_fp16 = const()[name = tensor<string, []>("op_11285_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1081_cast_fp16 = mul(x = var_11284_cast_fp16, y = var_11285_to_fp16)[name = tensor<string, []>("aw_chunk_1081_cast_fp16")];
+            tensor<string, []> var_11288_equation_0 = const()[name = tensor<string, []>("op_11288_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11288_cast_fp16 = einsum(equation = var_11288_equation_0, values = (var_11154_cast_fp16, var_10899_cast_fp16))[name = tensor<string, []>("op_11288_cast_fp16")];
+            tensor<fp16, []> var_11289_to_fp16 = const()[name = tensor<string, []>("op_11289_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1083_cast_fp16 = mul(x = var_11288_cast_fp16, y = var_11289_to_fp16)[name = tensor<string, []>("aw_chunk_1083_cast_fp16")];
+            tensor<string, []> var_11292_equation_0 = const()[name = tensor<string, []>("op_11292_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11292_cast_fp16 = einsum(equation = var_11292_equation_0, values = (var_11154_cast_fp16, var_10906_cast_fp16))[name = tensor<string, []>("op_11292_cast_fp16")];
+            tensor<fp16, []> var_11293_to_fp16 = const()[name = tensor<string, []>("op_11293_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1085_cast_fp16 = mul(x = var_11292_cast_fp16, y = var_11293_to_fp16)[name = tensor<string, []>("aw_chunk_1085_cast_fp16")];
+            tensor<string, []> var_11296_equation_0 = const()[name = tensor<string, []>("op_11296_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11296_cast_fp16 = einsum(equation = var_11296_equation_0, values = (var_11154_cast_fp16, var_10913_cast_fp16))[name = tensor<string, []>("op_11296_cast_fp16")];
+            tensor<fp16, []> var_11297_to_fp16 = const()[name = tensor<string, []>("op_11297_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1087_cast_fp16 = mul(x = var_11296_cast_fp16, y = var_11297_to_fp16)[name = tensor<string, []>("aw_chunk_1087_cast_fp16")];
+            tensor<string, []> var_11300_equation_0 = const()[name = tensor<string, []>("op_11300_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11300_cast_fp16 = einsum(equation = var_11300_equation_0, values = (var_11158_cast_fp16, var_10920_cast_fp16))[name = tensor<string, []>("op_11300_cast_fp16")];
+            tensor<fp16, []> var_11301_to_fp16 = const()[name = tensor<string, []>("op_11301_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1089_cast_fp16 = mul(x = var_11300_cast_fp16, y = var_11301_to_fp16)[name = tensor<string, []>("aw_chunk_1089_cast_fp16")];
+            tensor<string, []> var_11304_equation_0 = const()[name = tensor<string, []>("op_11304_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11304_cast_fp16 = einsum(equation = var_11304_equation_0, values = (var_11158_cast_fp16, var_10927_cast_fp16))[name = tensor<string, []>("op_11304_cast_fp16")];
+            tensor<fp16, []> var_11305_to_fp16 = const()[name = tensor<string, []>("op_11305_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1091_cast_fp16 = mul(x = var_11304_cast_fp16, y = var_11305_to_fp16)[name = tensor<string, []>("aw_chunk_1091_cast_fp16")];
+            tensor<string, []> var_11308_equation_0 = const()[name = tensor<string, []>("op_11308_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11308_cast_fp16 = einsum(equation = var_11308_equation_0, values = (var_11158_cast_fp16, var_10934_cast_fp16))[name = tensor<string, []>("op_11308_cast_fp16")];
+            tensor<fp16, []> var_11309_to_fp16 = const()[name = tensor<string, []>("op_11309_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1093_cast_fp16 = mul(x = var_11308_cast_fp16, y = var_11309_to_fp16)[name = tensor<string, []>("aw_chunk_1093_cast_fp16")];
+            tensor<string, []> var_11312_equation_0 = const()[name = tensor<string, []>("op_11312_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11312_cast_fp16 = einsum(equation = var_11312_equation_0, values = (var_11158_cast_fp16, var_10941_cast_fp16))[name = tensor<string, []>("op_11312_cast_fp16")];
+            tensor<fp16, []> var_11313_to_fp16 = const()[name = tensor<string, []>("op_11313_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1095_cast_fp16 = mul(x = var_11312_cast_fp16, y = var_11313_to_fp16)[name = tensor<string, []>("aw_chunk_1095_cast_fp16")];
+            tensor<string, []> var_11316_equation_0 = const()[name = tensor<string, []>("op_11316_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11316_cast_fp16 = einsum(equation = var_11316_equation_0, values = (var_11162_cast_fp16, var_10948_cast_fp16))[name = tensor<string, []>("op_11316_cast_fp16")];
+            tensor<fp16, []> var_11317_to_fp16 = const()[name = tensor<string, []>("op_11317_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1097_cast_fp16 = mul(x = var_11316_cast_fp16, y = var_11317_to_fp16)[name = tensor<string, []>("aw_chunk_1097_cast_fp16")];
+            tensor<string, []> var_11320_equation_0 = const()[name = tensor<string, []>("op_11320_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11320_cast_fp16 = einsum(equation = var_11320_equation_0, values = (var_11162_cast_fp16, var_10955_cast_fp16))[name = tensor<string, []>("op_11320_cast_fp16")];
+            tensor<fp16, []> var_11321_to_fp16 = const()[name = tensor<string, []>("op_11321_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1099_cast_fp16 = mul(x = var_11320_cast_fp16, y = var_11321_to_fp16)[name = tensor<string, []>("aw_chunk_1099_cast_fp16")];
+            tensor<string, []> var_11324_equation_0 = const()[name = tensor<string, []>("op_11324_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11324_cast_fp16 = einsum(equation = var_11324_equation_0, values = (var_11162_cast_fp16, var_10962_cast_fp16))[name = tensor<string, []>("op_11324_cast_fp16")];
+            tensor<fp16, []> var_11325_to_fp16 = const()[name = tensor<string, []>("op_11325_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1101_cast_fp16 = mul(x = var_11324_cast_fp16, y = var_11325_to_fp16)[name = tensor<string, []>("aw_chunk_1101_cast_fp16")];
+            tensor<string, []> var_11328_equation_0 = const()[name = tensor<string, []>("op_11328_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11328_cast_fp16 = einsum(equation = var_11328_equation_0, values = (var_11162_cast_fp16, var_10969_cast_fp16))[name = tensor<string, []>("op_11328_cast_fp16")];
+            tensor<fp16, []> var_11329_to_fp16 = const()[name = tensor<string, []>("op_11329_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1103_cast_fp16 = mul(x = var_11328_cast_fp16, y = var_11329_to_fp16)[name = tensor<string, []>("aw_chunk_1103_cast_fp16")];
+            tensor<string, []> var_11332_equation_0 = const()[name = tensor<string, []>("op_11332_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11332_cast_fp16 = einsum(equation = var_11332_equation_0, values = (var_11166_cast_fp16, var_10976_cast_fp16))[name = tensor<string, []>("op_11332_cast_fp16")];
+            tensor<fp16, []> var_11333_to_fp16 = const()[name = tensor<string, []>("op_11333_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1105_cast_fp16 = mul(x = var_11332_cast_fp16, y = var_11333_to_fp16)[name = tensor<string, []>("aw_chunk_1105_cast_fp16")];
+            tensor<string, []> var_11336_equation_0 = const()[name = tensor<string, []>("op_11336_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11336_cast_fp16 = einsum(equation = var_11336_equation_0, values = (var_11166_cast_fp16, var_10983_cast_fp16))[name = tensor<string, []>("op_11336_cast_fp16")];
+            tensor<fp16, []> var_11337_to_fp16 = const()[name = tensor<string, []>("op_11337_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1107_cast_fp16 = mul(x = var_11336_cast_fp16, y = var_11337_to_fp16)[name = tensor<string, []>("aw_chunk_1107_cast_fp16")];
+            tensor<string, []> var_11340_equation_0 = const()[name = tensor<string, []>("op_11340_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11340_cast_fp16 = einsum(equation = var_11340_equation_0, values = (var_11166_cast_fp16, var_10990_cast_fp16))[name = tensor<string, []>("op_11340_cast_fp16")];
+            tensor<fp16, []> var_11341_to_fp16 = const()[name = tensor<string, []>("op_11341_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1109_cast_fp16 = mul(x = var_11340_cast_fp16, y = var_11341_to_fp16)[name = tensor<string, []>("aw_chunk_1109_cast_fp16")];
+            tensor<string, []> var_11344_equation_0 = const()[name = tensor<string, []>("op_11344_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11344_cast_fp16 = einsum(equation = var_11344_equation_0, values = (var_11166_cast_fp16, var_10997_cast_fp16))[name = tensor<string, []>("op_11344_cast_fp16")];
+            tensor<fp16, []> var_11345_to_fp16 = const()[name = tensor<string, []>("op_11345_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1111_cast_fp16 = mul(x = var_11344_cast_fp16, y = var_11345_to_fp16)[name = tensor<string, []>("aw_chunk_1111_cast_fp16")];
+            tensor<string, []> var_11348_equation_0 = const()[name = tensor<string, []>("op_11348_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11348_cast_fp16 = einsum(equation = var_11348_equation_0, values = (var_11170_cast_fp16, var_11004_cast_fp16))[name = tensor<string, []>("op_11348_cast_fp16")];
+            tensor<fp16, []> var_11349_to_fp16 = const()[name = tensor<string, []>("op_11349_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1113_cast_fp16 = mul(x = var_11348_cast_fp16, y = var_11349_to_fp16)[name = tensor<string, []>("aw_chunk_1113_cast_fp16")];
+            tensor<string, []> var_11352_equation_0 = const()[name = tensor<string, []>("op_11352_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11352_cast_fp16 = einsum(equation = var_11352_equation_0, values = (var_11170_cast_fp16, var_11011_cast_fp16))[name = tensor<string, []>("op_11352_cast_fp16")];
+            tensor<fp16, []> var_11353_to_fp16 = const()[name = tensor<string, []>("op_11353_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1115_cast_fp16 = mul(x = var_11352_cast_fp16, y = var_11353_to_fp16)[name = tensor<string, []>("aw_chunk_1115_cast_fp16")];
+            tensor<string, []> var_11356_equation_0 = const()[name = tensor<string, []>("op_11356_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11356_cast_fp16 = einsum(equation = var_11356_equation_0, values = (var_11170_cast_fp16, var_11018_cast_fp16))[name = tensor<string, []>("op_11356_cast_fp16")];
+            tensor<fp16, []> var_11357_to_fp16 = const()[name = tensor<string, []>("op_11357_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1117_cast_fp16 = mul(x = var_11356_cast_fp16, y = var_11357_to_fp16)[name = tensor<string, []>("aw_chunk_1117_cast_fp16")];
+            tensor<string, []> var_11360_equation_0 = const()[name = tensor<string, []>("op_11360_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11360_cast_fp16 = einsum(equation = var_11360_equation_0, values = (var_11170_cast_fp16, var_11025_cast_fp16))[name = tensor<string, []>("op_11360_cast_fp16")];
+            tensor<fp16, []> var_11361_to_fp16 = const()[name = tensor<string, []>("op_11361_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1119_cast_fp16 = mul(x = var_11360_cast_fp16, y = var_11361_to_fp16)[name = tensor<string, []>("aw_chunk_1119_cast_fp16")];
+            tensor<string, []> var_11364_equation_0 = const()[name = tensor<string, []>("op_11364_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11364_cast_fp16 = einsum(equation = var_11364_equation_0, values = (var_11174_cast_fp16, var_11032_cast_fp16))[name = tensor<string, []>("op_11364_cast_fp16")];
+            tensor<fp16, []> var_11365_to_fp16 = const()[name = tensor<string, []>("op_11365_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1121_cast_fp16 = mul(x = var_11364_cast_fp16, y = var_11365_to_fp16)[name = tensor<string, []>("aw_chunk_1121_cast_fp16")];
+            tensor<string, []> var_11368_equation_0 = const()[name = tensor<string, []>("op_11368_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11368_cast_fp16 = einsum(equation = var_11368_equation_0, values = (var_11174_cast_fp16, var_11039_cast_fp16))[name = tensor<string, []>("op_11368_cast_fp16")];
+            tensor<fp16, []> var_11369_to_fp16 = const()[name = tensor<string, []>("op_11369_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1123_cast_fp16 = mul(x = var_11368_cast_fp16, y = var_11369_to_fp16)[name = tensor<string, []>("aw_chunk_1123_cast_fp16")];
+            tensor<string, []> var_11372_equation_0 = const()[name = tensor<string, []>("op_11372_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11372_cast_fp16 = einsum(equation = var_11372_equation_0, values = (var_11174_cast_fp16, var_11046_cast_fp16))[name = tensor<string, []>("op_11372_cast_fp16")];
+            tensor<fp16, []> var_11373_to_fp16 = const()[name = tensor<string, []>("op_11373_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1125_cast_fp16 = mul(x = var_11372_cast_fp16, y = var_11373_to_fp16)[name = tensor<string, []>("aw_chunk_1125_cast_fp16")];
+            tensor<string, []> var_11376_equation_0 = const()[name = tensor<string, []>("op_11376_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11376_cast_fp16 = einsum(equation = var_11376_equation_0, values = (var_11174_cast_fp16, var_11053_cast_fp16))[name = tensor<string, []>("op_11376_cast_fp16")];
+            tensor<fp16, []> var_11377_to_fp16 = const()[name = tensor<string, []>("op_11377_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1127_cast_fp16 = mul(x = var_11376_cast_fp16, y = var_11377_to_fp16)[name = tensor<string, []>("aw_chunk_1127_cast_fp16")];
+            tensor<string, []> var_11380_equation_0 = const()[name = tensor<string, []>("op_11380_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11380_cast_fp16 = einsum(equation = var_11380_equation_0, values = (var_11178_cast_fp16, var_11060_cast_fp16))[name = tensor<string, []>("op_11380_cast_fp16")];
+            tensor<fp16, []> var_11381_to_fp16 = const()[name = tensor<string, []>("op_11381_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1129_cast_fp16 = mul(x = var_11380_cast_fp16, y = var_11381_to_fp16)[name = tensor<string, []>("aw_chunk_1129_cast_fp16")];
+            tensor<string, []> var_11384_equation_0 = const()[name = tensor<string, []>("op_11384_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11384_cast_fp16 = einsum(equation = var_11384_equation_0, values = (var_11178_cast_fp16, var_11067_cast_fp16))[name = tensor<string, []>("op_11384_cast_fp16")];
+            tensor<fp16, []> var_11385_to_fp16 = const()[name = tensor<string, []>("op_11385_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1131_cast_fp16 = mul(x = var_11384_cast_fp16, y = var_11385_to_fp16)[name = tensor<string, []>("aw_chunk_1131_cast_fp16")];
+            tensor<string, []> var_11388_equation_0 = const()[name = tensor<string, []>("op_11388_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11388_cast_fp16 = einsum(equation = var_11388_equation_0, values = (var_11178_cast_fp16, var_11074_cast_fp16))[name = tensor<string, []>("op_11388_cast_fp16")];
+            tensor<fp16, []> var_11389_to_fp16 = const()[name = tensor<string, []>("op_11389_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1133_cast_fp16 = mul(x = var_11388_cast_fp16, y = var_11389_to_fp16)[name = tensor<string, []>("aw_chunk_1133_cast_fp16")];
+            tensor<string, []> var_11392_equation_0 = const()[name = tensor<string, []>("op_11392_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11392_cast_fp16 = einsum(equation = var_11392_equation_0, values = (var_11178_cast_fp16, var_11081_cast_fp16))[name = tensor<string, []>("op_11392_cast_fp16")];
+            tensor<fp16, []> var_11393_to_fp16 = const()[name = tensor<string, []>("op_11393_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1135_cast_fp16 = mul(x = var_11392_cast_fp16, y = var_11393_to_fp16)[name = tensor<string, []>("aw_chunk_1135_cast_fp16")];
+            tensor<string, []> var_11396_equation_0 = const()[name = tensor<string, []>("op_11396_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11396_cast_fp16 = einsum(equation = var_11396_equation_0, values = (var_11182_cast_fp16, var_11088_cast_fp16))[name = tensor<string, []>("op_11396_cast_fp16")];
+            tensor<fp16, []> var_11397_to_fp16 = const()[name = tensor<string, []>("op_11397_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1137_cast_fp16 = mul(x = var_11396_cast_fp16, y = var_11397_to_fp16)[name = tensor<string, []>("aw_chunk_1137_cast_fp16")];
+            tensor<string, []> var_11400_equation_0 = const()[name = tensor<string, []>("op_11400_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11400_cast_fp16 = einsum(equation = var_11400_equation_0, values = (var_11182_cast_fp16, var_11095_cast_fp16))[name = tensor<string, []>("op_11400_cast_fp16")];
+            tensor<fp16, []> var_11401_to_fp16 = const()[name = tensor<string, []>("op_11401_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1139_cast_fp16 = mul(x = var_11400_cast_fp16, y = var_11401_to_fp16)[name = tensor<string, []>("aw_chunk_1139_cast_fp16")];
+            tensor<string, []> var_11404_equation_0 = const()[name = tensor<string, []>("op_11404_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11404_cast_fp16 = einsum(equation = var_11404_equation_0, values = (var_11182_cast_fp16, var_11102_cast_fp16))[name = tensor<string, []>("op_11404_cast_fp16")];
+            tensor<fp16, []> var_11405_to_fp16 = const()[name = tensor<string, []>("op_11405_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1141_cast_fp16 = mul(x = var_11404_cast_fp16, y = var_11405_to_fp16)[name = tensor<string, []>("aw_chunk_1141_cast_fp16")];
+            tensor<string, []> var_11408_equation_0 = const()[name = tensor<string, []>("op_11408_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11408_cast_fp16 = einsum(equation = var_11408_equation_0, values = (var_11182_cast_fp16, var_11109_cast_fp16))[name = tensor<string, []>("op_11408_cast_fp16")];
+            tensor<fp16, []> var_11409_to_fp16 = const()[name = tensor<string, []>("op_11409_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1143_cast_fp16 = mul(x = var_11408_cast_fp16, y = var_11409_to_fp16)[name = tensor<string, []>("aw_chunk_1143_cast_fp16")];
+            tensor<string, []> var_11412_equation_0 = const()[name = tensor<string, []>("op_11412_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11412_cast_fp16 = einsum(equation = var_11412_equation_0, values = (var_11186_cast_fp16, var_11116_cast_fp16))[name = tensor<string, []>("op_11412_cast_fp16")];
+            tensor<fp16, []> var_11413_to_fp16 = const()[name = tensor<string, []>("op_11413_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1145_cast_fp16 = mul(x = var_11412_cast_fp16, y = var_11413_to_fp16)[name = tensor<string, []>("aw_chunk_1145_cast_fp16")];
+            tensor<string, []> var_11416_equation_0 = const()[name = tensor<string, []>("op_11416_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11416_cast_fp16 = einsum(equation = var_11416_equation_0, values = (var_11186_cast_fp16, var_11123_cast_fp16))[name = tensor<string, []>("op_11416_cast_fp16")];
+            tensor<fp16, []> var_11417_to_fp16 = const()[name = tensor<string, []>("op_11417_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1147_cast_fp16 = mul(x = var_11416_cast_fp16, y = var_11417_to_fp16)[name = tensor<string, []>("aw_chunk_1147_cast_fp16")];
+            tensor<string, []> var_11420_equation_0 = const()[name = tensor<string, []>("op_11420_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11420_cast_fp16 = einsum(equation = var_11420_equation_0, values = (var_11186_cast_fp16, var_11130_cast_fp16))[name = tensor<string, []>("op_11420_cast_fp16")];
+            tensor<fp16, []> var_11421_to_fp16 = const()[name = tensor<string, []>("op_11421_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1149_cast_fp16 = mul(x = var_11420_cast_fp16, y = var_11421_to_fp16)[name = tensor<string, []>("aw_chunk_1149_cast_fp16")];
+            tensor<string, []> var_11424_equation_0 = const()[name = tensor<string, []>("op_11424_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11424_cast_fp16 = einsum(equation = var_11424_equation_0, values = (var_11186_cast_fp16, var_11137_cast_fp16))[name = tensor<string, []>("op_11424_cast_fp16")];
+            tensor<fp16, []> var_11425_to_fp16 = const()[name = tensor<string, []>("op_11425_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_cast_fp16 = mul(x = var_11424_cast_fp16, y = var_11425_to_fp16)[name = tensor<string, []>("aw_chunk_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11427_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1057_cast_fp16)[name = tensor<string, []>("op_11427_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11428_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1059_cast_fp16)[name = tensor<string, []>("op_11428_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11429_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1061_cast_fp16)[name = tensor<string, []>("op_11429_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11430_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1063_cast_fp16)[name = tensor<string, []>("op_11430_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11431_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1065_cast_fp16)[name = tensor<string, []>("op_11431_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11432_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1067_cast_fp16)[name = tensor<string, []>("op_11432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11433_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1069_cast_fp16)[name = tensor<string, []>("op_11433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11434_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1071_cast_fp16)[name = tensor<string, []>("op_11434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11435_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1073_cast_fp16)[name = tensor<string, []>("op_11435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11436_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1075_cast_fp16)[name = tensor<string, []>("op_11436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11437_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1077_cast_fp16)[name = tensor<string, []>("op_11437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11438_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1079_cast_fp16)[name = tensor<string, []>("op_11438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11439_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1081_cast_fp16)[name = tensor<string, []>("op_11439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11440_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1083_cast_fp16)[name = tensor<string, []>("op_11440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11441_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1085_cast_fp16)[name = tensor<string, []>("op_11441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11442_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1087_cast_fp16)[name = tensor<string, []>("op_11442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11443_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1089_cast_fp16)[name = tensor<string, []>("op_11443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11444_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1091_cast_fp16)[name = tensor<string, []>("op_11444_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11445_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1093_cast_fp16)[name = tensor<string, []>("op_11445_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11446_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1095_cast_fp16)[name = tensor<string, []>("op_11446_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11447_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1097_cast_fp16)[name = tensor<string, []>("op_11447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11448_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1099_cast_fp16)[name = tensor<string, []>("op_11448_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11449_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1101_cast_fp16)[name = tensor<string, []>("op_11449_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11450_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1103_cast_fp16)[name = tensor<string, []>("op_11450_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11451_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1105_cast_fp16)[name = tensor<string, []>("op_11451_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11452_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1107_cast_fp16)[name = tensor<string, []>("op_11452_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11453_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1109_cast_fp16)[name = tensor<string, []>("op_11453_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11454_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1111_cast_fp16)[name = tensor<string, []>("op_11454_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11455_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1113_cast_fp16)[name = tensor<string, []>("op_11455_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11456_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1115_cast_fp16)[name = tensor<string, []>("op_11456_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11457_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1117_cast_fp16)[name = tensor<string, []>("op_11457_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11458_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1119_cast_fp16)[name = tensor<string, []>("op_11458_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11459_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1121_cast_fp16)[name = tensor<string, []>("op_11459_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11460_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1123_cast_fp16)[name = tensor<string, []>("op_11460_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11461_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1125_cast_fp16)[name = tensor<string, []>("op_11461_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11462_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1127_cast_fp16)[name = tensor<string, []>("op_11462_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11463_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1129_cast_fp16)[name = tensor<string, []>("op_11463_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11464_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1131_cast_fp16)[name = tensor<string, []>("op_11464_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11465_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1133_cast_fp16)[name = tensor<string, []>("op_11465_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11466_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1135_cast_fp16)[name = tensor<string, []>("op_11466_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11467_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1137_cast_fp16)[name = tensor<string, []>("op_11467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11468_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1139_cast_fp16)[name = tensor<string, []>("op_11468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11469_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1141_cast_fp16)[name = tensor<string, []>("op_11469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11470_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1143_cast_fp16)[name = tensor<string, []>("op_11470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11471_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1145_cast_fp16)[name = tensor<string, []>("op_11471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11472_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1147_cast_fp16)[name = tensor<string, []>("op_11472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11473_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_1149_cast_fp16)[name = tensor<string, []>("op_11473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_11474_cast_fp16 = softmax(axis = var_10700, x = aw_chunk_cast_fp16)[name = tensor<string, []>("op_11474_cast_fp16")];
+            tensor<string, []> var_11476_equation_0 = const()[name = tensor<string, []>("op_11476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11476_cast_fp16 = einsum(equation = var_11476_equation_0, values = (var_11188_cast_fp16, var_11427_cast_fp16))[name = tensor<string, []>("op_11476_cast_fp16")];
+            tensor<string, []> var_11478_equation_0 = const()[name = tensor<string, []>("op_11478_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11478_cast_fp16 = einsum(equation = var_11478_equation_0, values = (var_11188_cast_fp16, var_11428_cast_fp16))[name = tensor<string, []>("op_11478_cast_fp16")];
+            tensor<string, []> var_11480_equation_0 = const()[name = tensor<string, []>("op_11480_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11480_cast_fp16 = einsum(equation = var_11480_equation_0, values = (var_11188_cast_fp16, var_11429_cast_fp16))[name = tensor<string, []>("op_11480_cast_fp16")];
+            tensor<string, []> var_11482_equation_0 = const()[name = tensor<string, []>("op_11482_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11482_cast_fp16 = einsum(equation = var_11482_equation_0, values = (var_11188_cast_fp16, var_11430_cast_fp16))[name = tensor<string, []>("op_11482_cast_fp16")];
+            tensor<string, []> var_11484_equation_0 = const()[name = tensor<string, []>("op_11484_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11484_cast_fp16 = einsum(equation = var_11484_equation_0, values = (var_11192_cast_fp16, var_11431_cast_fp16))[name = tensor<string, []>("op_11484_cast_fp16")];
+            tensor<string, []> var_11486_equation_0 = const()[name = tensor<string, []>("op_11486_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11486_cast_fp16 = einsum(equation = var_11486_equation_0, values = (var_11192_cast_fp16, var_11432_cast_fp16))[name = tensor<string, []>("op_11486_cast_fp16")];
+            tensor<string, []> var_11488_equation_0 = const()[name = tensor<string, []>("op_11488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11488_cast_fp16 = einsum(equation = var_11488_equation_0, values = (var_11192_cast_fp16, var_11433_cast_fp16))[name = tensor<string, []>("op_11488_cast_fp16")];
+            tensor<string, []> var_11490_equation_0 = const()[name = tensor<string, []>("op_11490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11490_cast_fp16 = einsum(equation = var_11490_equation_0, values = (var_11192_cast_fp16, var_11434_cast_fp16))[name = tensor<string, []>("op_11490_cast_fp16")];
+            tensor<string, []> var_11492_equation_0 = const()[name = tensor<string, []>("op_11492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11492_cast_fp16 = einsum(equation = var_11492_equation_0, values = (var_11196_cast_fp16, var_11435_cast_fp16))[name = tensor<string, []>("op_11492_cast_fp16")];
+            tensor<string, []> var_11494_equation_0 = const()[name = tensor<string, []>("op_11494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11494_cast_fp16 = einsum(equation = var_11494_equation_0, values = (var_11196_cast_fp16, var_11436_cast_fp16))[name = tensor<string, []>("op_11494_cast_fp16")];
+            tensor<string, []> var_11496_equation_0 = const()[name = tensor<string, []>("op_11496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11496_cast_fp16 = einsum(equation = var_11496_equation_0, values = (var_11196_cast_fp16, var_11437_cast_fp16))[name = tensor<string, []>("op_11496_cast_fp16")];
+            tensor<string, []> var_11498_equation_0 = const()[name = tensor<string, []>("op_11498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11498_cast_fp16 = einsum(equation = var_11498_equation_0, values = (var_11196_cast_fp16, var_11438_cast_fp16))[name = tensor<string, []>("op_11498_cast_fp16")];
+            tensor<string, []> var_11500_equation_0 = const()[name = tensor<string, []>("op_11500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11500_cast_fp16 = einsum(equation = var_11500_equation_0, values = (var_11200_cast_fp16, var_11439_cast_fp16))[name = tensor<string, []>("op_11500_cast_fp16")];
+            tensor<string, []> var_11502_equation_0 = const()[name = tensor<string, []>("op_11502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11502_cast_fp16 = einsum(equation = var_11502_equation_0, values = (var_11200_cast_fp16, var_11440_cast_fp16))[name = tensor<string, []>("op_11502_cast_fp16")];
+            tensor<string, []> var_11504_equation_0 = const()[name = tensor<string, []>("op_11504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11504_cast_fp16 = einsum(equation = var_11504_equation_0, values = (var_11200_cast_fp16, var_11441_cast_fp16))[name = tensor<string, []>("op_11504_cast_fp16")];
+            tensor<string, []> var_11506_equation_0 = const()[name = tensor<string, []>("op_11506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11506_cast_fp16 = einsum(equation = var_11506_equation_0, values = (var_11200_cast_fp16, var_11442_cast_fp16))[name = tensor<string, []>("op_11506_cast_fp16")];
+            tensor<string, []> var_11508_equation_0 = const()[name = tensor<string, []>("op_11508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11508_cast_fp16 = einsum(equation = var_11508_equation_0, values = (var_11204_cast_fp16, var_11443_cast_fp16))[name = tensor<string, []>("op_11508_cast_fp16")];
+            tensor<string, []> var_11510_equation_0 = const()[name = tensor<string, []>("op_11510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11510_cast_fp16 = einsum(equation = var_11510_equation_0, values = (var_11204_cast_fp16, var_11444_cast_fp16))[name = tensor<string, []>("op_11510_cast_fp16")];
+            tensor<string, []> var_11512_equation_0 = const()[name = tensor<string, []>("op_11512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11512_cast_fp16 = einsum(equation = var_11512_equation_0, values = (var_11204_cast_fp16, var_11445_cast_fp16))[name = tensor<string, []>("op_11512_cast_fp16")];
+            tensor<string, []> var_11514_equation_0 = const()[name = tensor<string, []>("op_11514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11514_cast_fp16 = einsum(equation = var_11514_equation_0, values = (var_11204_cast_fp16, var_11446_cast_fp16))[name = tensor<string, []>("op_11514_cast_fp16")];
+            tensor<string, []> var_11516_equation_0 = const()[name = tensor<string, []>("op_11516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11516_cast_fp16 = einsum(equation = var_11516_equation_0, values = (var_11208_cast_fp16, var_11447_cast_fp16))[name = tensor<string, []>("op_11516_cast_fp16")];
+            tensor<string, []> var_11518_equation_0 = const()[name = tensor<string, []>("op_11518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11518_cast_fp16 = einsum(equation = var_11518_equation_0, values = (var_11208_cast_fp16, var_11448_cast_fp16))[name = tensor<string, []>("op_11518_cast_fp16")];
+            tensor<string, []> var_11520_equation_0 = const()[name = tensor<string, []>("op_11520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11520_cast_fp16 = einsum(equation = var_11520_equation_0, values = (var_11208_cast_fp16, var_11449_cast_fp16))[name = tensor<string, []>("op_11520_cast_fp16")];
+            tensor<string, []> var_11522_equation_0 = const()[name = tensor<string, []>("op_11522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11522_cast_fp16 = einsum(equation = var_11522_equation_0, values = (var_11208_cast_fp16, var_11450_cast_fp16))[name = tensor<string, []>("op_11522_cast_fp16")];
+            tensor<string, []> var_11524_equation_0 = const()[name = tensor<string, []>("op_11524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11524_cast_fp16 = einsum(equation = var_11524_equation_0, values = (var_11212_cast_fp16, var_11451_cast_fp16))[name = tensor<string, []>("op_11524_cast_fp16")];
+            tensor<string, []> var_11526_equation_0 = const()[name = tensor<string, []>("op_11526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11526_cast_fp16 = einsum(equation = var_11526_equation_0, values = (var_11212_cast_fp16, var_11452_cast_fp16))[name = tensor<string, []>("op_11526_cast_fp16")];
+            tensor<string, []> var_11528_equation_0 = const()[name = tensor<string, []>("op_11528_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11528_cast_fp16 = einsum(equation = var_11528_equation_0, values = (var_11212_cast_fp16, var_11453_cast_fp16))[name = tensor<string, []>("op_11528_cast_fp16")];
+            tensor<string, []> var_11530_equation_0 = const()[name = tensor<string, []>("op_11530_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11530_cast_fp16 = einsum(equation = var_11530_equation_0, values = (var_11212_cast_fp16, var_11454_cast_fp16))[name = tensor<string, []>("op_11530_cast_fp16")];
+            tensor<string, []> var_11532_equation_0 = const()[name = tensor<string, []>("op_11532_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11532_cast_fp16 = einsum(equation = var_11532_equation_0, values = (var_11216_cast_fp16, var_11455_cast_fp16))[name = tensor<string, []>("op_11532_cast_fp16")];
+            tensor<string, []> var_11534_equation_0 = const()[name = tensor<string, []>("op_11534_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11534_cast_fp16 = einsum(equation = var_11534_equation_0, values = (var_11216_cast_fp16, var_11456_cast_fp16))[name = tensor<string, []>("op_11534_cast_fp16")];
+            tensor<string, []> var_11536_equation_0 = const()[name = tensor<string, []>("op_11536_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11536_cast_fp16 = einsum(equation = var_11536_equation_0, values = (var_11216_cast_fp16, var_11457_cast_fp16))[name = tensor<string, []>("op_11536_cast_fp16")];
+            tensor<string, []> var_11538_equation_0 = const()[name = tensor<string, []>("op_11538_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11538_cast_fp16 = einsum(equation = var_11538_equation_0, values = (var_11216_cast_fp16, var_11458_cast_fp16))[name = tensor<string, []>("op_11538_cast_fp16")];
+            tensor<string, []> var_11540_equation_0 = const()[name = tensor<string, []>("op_11540_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11540_cast_fp16 = einsum(equation = var_11540_equation_0, values = (var_11220_cast_fp16, var_11459_cast_fp16))[name = tensor<string, []>("op_11540_cast_fp16")];
+            tensor<string, []> var_11542_equation_0 = const()[name = tensor<string, []>("op_11542_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11542_cast_fp16 = einsum(equation = var_11542_equation_0, values = (var_11220_cast_fp16, var_11460_cast_fp16))[name = tensor<string, []>("op_11542_cast_fp16")];
+            tensor<string, []> var_11544_equation_0 = const()[name = tensor<string, []>("op_11544_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11544_cast_fp16 = einsum(equation = var_11544_equation_0, values = (var_11220_cast_fp16, var_11461_cast_fp16))[name = tensor<string, []>("op_11544_cast_fp16")];
+            tensor<string, []> var_11546_equation_0 = const()[name = tensor<string, []>("op_11546_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11546_cast_fp16 = einsum(equation = var_11546_equation_0, values = (var_11220_cast_fp16, var_11462_cast_fp16))[name = tensor<string, []>("op_11546_cast_fp16")];
+            tensor<string, []> var_11548_equation_0 = const()[name = tensor<string, []>("op_11548_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11548_cast_fp16 = einsum(equation = var_11548_equation_0, values = (var_11224_cast_fp16, var_11463_cast_fp16))[name = tensor<string, []>("op_11548_cast_fp16")];
+            tensor<string, []> var_11550_equation_0 = const()[name = tensor<string, []>("op_11550_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11550_cast_fp16 = einsum(equation = var_11550_equation_0, values = (var_11224_cast_fp16, var_11464_cast_fp16))[name = tensor<string, []>("op_11550_cast_fp16")];
+            tensor<string, []> var_11552_equation_0 = const()[name = tensor<string, []>("op_11552_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11552_cast_fp16 = einsum(equation = var_11552_equation_0, values = (var_11224_cast_fp16, var_11465_cast_fp16))[name = tensor<string, []>("op_11552_cast_fp16")];
+            tensor<string, []> var_11554_equation_0 = const()[name = tensor<string, []>("op_11554_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11554_cast_fp16 = einsum(equation = var_11554_equation_0, values = (var_11224_cast_fp16, var_11466_cast_fp16))[name = tensor<string, []>("op_11554_cast_fp16")];
+            tensor<string, []> var_11556_equation_0 = const()[name = tensor<string, []>("op_11556_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11556_cast_fp16 = einsum(equation = var_11556_equation_0, values = (var_11228_cast_fp16, var_11467_cast_fp16))[name = tensor<string, []>("op_11556_cast_fp16")];
+            tensor<string, []> var_11558_equation_0 = const()[name = tensor<string, []>("op_11558_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11558_cast_fp16 = einsum(equation = var_11558_equation_0, values = (var_11228_cast_fp16, var_11468_cast_fp16))[name = tensor<string, []>("op_11558_cast_fp16")];
+            tensor<string, []> var_11560_equation_0 = const()[name = tensor<string, []>("op_11560_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11560_cast_fp16 = einsum(equation = var_11560_equation_0, values = (var_11228_cast_fp16, var_11469_cast_fp16))[name = tensor<string, []>("op_11560_cast_fp16")];
+            tensor<string, []> var_11562_equation_0 = const()[name = tensor<string, []>("op_11562_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11562_cast_fp16 = einsum(equation = var_11562_equation_0, values = (var_11228_cast_fp16, var_11470_cast_fp16))[name = tensor<string, []>("op_11562_cast_fp16")];
+            tensor<string, []> var_11564_equation_0 = const()[name = tensor<string, []>("op_11564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11564_cast_fp16 = einsum(equation = var_11564_equation_0, values = (var_11232_cast_fp16, var_11471_cast_fp16))[name = tensor<string, []>("op_11564_cast_fp16")];
+            tensor<string, []> var_11566_equation_0 = const()[name = tensor<string, []>("op_11566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11566_cast_fp16 = einsum(equation = var_11566_equation_0, values = (var_11232_cast_fp16, var_11472_cast_fp16))[name = tensor<string, []>("op_11566_cast_fp16")];
+            tensor<string, []> var_11568_equation_0 = const()[name = tensor<string, []>("op_11568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11568_cast_fp16 = einsum(equation = var_11568_equation_0, values = (var_11232_cast_fp16, var_11473_cast_fp16))[name = tensor<string, []>("op_11568_cast_fp16")];
+            tensor<string, []> var_11570_equation_0 = const()[name = tensor<string, []>("op_11570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_11570_cast_fp16 = einsum(equation = var_11570_equation_0, values = (var_11232_cast_fp16, var_11474_cast_fp16))[name = tensor<string, []>("op_11570_cast_fp16")];
+            tensor<bool, []> var_11572_interleave_0 = const()[name = tensor<string, []>("op_11572_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11572_cast_fp16 = concat(axis = var_10683, interleave = var_11572_interleave_0, values = (var_11476_cast_fp16, var_11478_cast_fp16, var_11480_cast_fp16, var_11482_cast_fp16))[name = tensor<string, []>("op_11572_cast_fp16")];
+            tensor<bool, []> var_11574_interleave_0 = const()[name = tensor<string, []>("op_11574_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11574_cast_fp16 = concat(axis = var_10683, interleave = var_11574_interleave_0, values = (var_11484_cast_fp16, var_11486_cast_fp16, var_11488_cast_fp16, var_11490_cast_fp16))[name = tensor<string, []>("op_11574_cast_fp16")];
+            tensor<bool, []> var_11576_interleave_0 = const()[name = tensor<string, []>("op_11576_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11576_cast_fp16 = concat(axis = var_10683, interleave = var_11576_interleave_0, values = (var_11492_cast_fp16, var_11494_cast_fp16, var_11496_cast_fp16, var_11498_cast_fp16))[name = tensor<string, []>("op_11576_cast_fp16")];
+            tensor<bool, []> var_11578_interleave_0 = const()[name = tensor<string, []>("op_11578_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11578_cast_fp16 = concat(axis = var_10683, interleave = var_11578_interleave_0, values = (var_11500_cast_fp16, var_11502_cast_fp16, var_11504_cast_fp16, var_11506_cast_fp16))[name = tensor<string, []>("op_11578_cast_fp16")];
+            tensor<bool, []> var_11580_interleave_0 = const()[name = tensor<string, []>("op_11580_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11580_cast_fp16 = concat(axis = var_10683, interleave = var_11580_interleave_0, values = (var_11508_cast_fp16, var_11510_cast_fp16, var_11512_cast_fp16, var_11514_cast_fp16))[name = tensor<string, []>("op_11580_cast_fp16")];
+            tensor<bool, []> var_11582_interleave_0 = const()[name = tensor<string, []>("op_11582_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11582_cast_fp16 = concat(axis = var_10683, interleave = var_11582_interleave_0, values = (var_11516_cast_fp16, var_11518_cast_fp16, var_11520_cast_fp16, var_11522_cast_fp16))[name = tensor<string, []>("op_11582_cast_fp16")];
+            tensor<bool, []> var_11584_interleave_0 = const()[name = tensor<string, []>("op_11584_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11584_cast_fp16 = concat(axis = var_10683, interleave = var_11584_interleave_0, values = (var_11524_cast_fp16, var_11526_cast_fp16, var_11528_cast_fp16, var_11530_cast_fp16))[name = tensor<string, []>("op_11584_cast_fp16")];
+            tensor<bool, []> var_11586_interleave_0 = const()[name = tensor<string, []>("op_11586_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11586_cast_fp16 = concat(axis = var_10683, interleave = var_11586_interleave_0, values = (var_11532_cast_fp16, var_11534_cast_fp16, var_11536_cast_fp16, var_11538_cast_fp16))[name = tensor<string, []>("op_11586_cast_fp16")];
+            tensor<bool, []> var_11588_interleave_0 = const()[name = tensor<string, []>("op_11588_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11588_cast_fp16 = concat(axis = var_10683, interleave = var_11588_interleave_0, values = (var_11540_cast_fp16, var_11542_cast_fp16, var_11544_cast_fp16, var_11546_cast_fp16))[name = tensor<string, []>("op_11588_cast_fp16")];
+            tensor<bool, []> var_11590_interleave_0 = const()[name = tensor<string, []>("op_11590_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11590_cast_fp16 = concat(axis = var_10683, interleave = var_11590_interleave_0, values = (var_11548_cast_fp16, var_11550_cast_fp16, var_11552_cast_fp16, var_11554_cast_fp16))[name = tensor<string, []>("op_11590_cast_fp16")];
+            tensor<bool, []> var_11592_interleave_0 = const()[name = tensor<string, []>("op_11592_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11592_cast_fp16 = concat(axis = var_10683, interleave = var_11592_interleave_0, values = (var_11556_cast_fp16, var_11558_cast_fp16, var_11560_cast_fp16, var_11562_cast_fp16))[name = tensor<string, []>("op_11592_cast_fp16")];
+            tensor<bool, []> var_11594_interleave_0 = const()[name = tensor<string, []>("op_11594_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_11594_cast_fp16 = concat(axis = var_10683, interleave = var_11594_interleave_0, values = (var_11564_cast_fp16, var_11566_cast_fp16, var_11568_cast_fp16, var_11570_cast_fp16))[name = tensor<string, []>("op_11594_cast_fp16")];
+            tensor<bool, []> input_89_interleave_0 = const()[name = tensor<string, []>("input_89_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_89_cast_fp16 = concat(axis = var_10700, interleave = input_89_interleave_0, values = (var_11572_cast_fp16, var_11574_cast_fp16, var_11576_cast_fp16, var_11578_cast_fp16, var_11580_cast_fp16, var_11582_cast_fp16, var_11584_cast_fp16, var_11586_cast_fp16, var_11588_cast_fp16, var_11590_cast_fp16, var_11592_cast_fp16, var_11594_cast_fp16))[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<int32, [2]> var_11599 = const()[name = tensor<string, []>("op_11599"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_11601 = const()[name = tensor<string, []>("op_11601"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_pad_type_0 = const()[name = tensor<string, []>("obj_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_pad_0 = const()[name = tensor<string, []>("obj_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165690624)))];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166870336)))];
+            tensor<fp16, [1, 768, 1, 1500]> obj_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = var_11601, groups = var_10700, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = var_11599, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> var_11607 = const()[name = tensor<string, []>("op_11607"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_47_cast_fp16 = reduce_mean(axes = var_11607, keep_dims = var_10701, x = inputs_47_cast_fp16)[name = tensor<string, []>("channels_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_47_cast_fp16 = sub(x = inputs_47_cast_fp16, y = channels_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = zero_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_sq_47_cast_fp16")];
+            tensor<int32, [1]> var_11611 = const()[name = tensor<string, []>("op_11611"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_11612_cast_fp16 = reduce_mean(axes = var_11611, keep_dims = var_10701, x = zero_mean_sq_47_cast_fp16)[name = tensor<string, []>("op_11612_cast_fp16")];
+            tensor<fp16, []> var_11613_to_fp16 = const()[name = tensor<string, []>("op_11613_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_11614_cast_fp16 = add(x = var_11612_cast_fp16, y = var_11613_to_fp16)[name = tensor<string, []>("op_11614_cast_fp16")];
+            tensor<fp16, []> denom_47_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_47_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0_to_fp16, x = var_11614_cast_fp16)[name = tensor<string, []>("denom_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = denom_47_cast_fp16)[name = tensor<string, []>("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_91_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_91_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166871936)))];
+            tensor<fp16, [768]> input_91_beta_0_to_fp16 = const()[name = tensor<string, []>("input_91_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166873536)))];
+            tensor<fp16, []> input_91_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_91_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<int32, [2]> var_11625 = const()[name = tensor<string, []>("op_11625"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_11627 = const()[name = tensor<string, []>("op_11627"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_93_pad_type_0 = const()[name = tensor<string, []>("input_93_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_93_pad_0 = const()[name = tensor<string, []>("input_93_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166875136)))];
+            tensor<fp16, [3072]> layers_11_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171593792)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_93_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = var_11627, groups = var_10700, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = var_11625, weight = layers_11_fc1_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_93_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_11633 = const()[name = tensor<string, []>("op_11633"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_11635 = const()[name = tensor<string, []>("op_11635"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_pad_type_0 = const()[name = tensor<string, []>("hidden_states_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_pad_0 = const()[name = tensor<string, []>("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171600000)))];
+            tensor<fp16, [768]> layers_11_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176318656)))];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = var_11635, groups = var_10700, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = var_11633, weight = layers_11_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_11641 = const()[name = tensor<string, []>("op_11641"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_11645 = const()[name = tensor<string, []>("op_11645"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_cast_fp16 = reduce_mean(axes = var_11645, keep_dims = var_11641, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_11649 = const()[name = tensor<string, []>("op_11649"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_11650_cast_fp16 = reduce_mean(axes = var_11649, keep_dims = var_11641, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_11650_cast_fp16")];
+            tensor<fp16, []> var_11651_to_fp16 = const()[name = tensor<string, []>("op_11651_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_11652_cast_fp16 = add(x = var_11650_cast_fp16, y = var_11651_to_fp16)[name = tensor<string, []>("op_11652_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_11652_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176320256)))];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176321856)))];
+            tensor<fp16, []> encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("encoder_output_embeds_type_fp32_cast_fp16")];
+        } -> (encoder_output_embeds);
+}
\ No newline at end of file
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-small/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..b6314fec31f6cf5901665aba75ae05333313cc2c
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68ca04660b8b050c68ca54c27d97c47e4133bc591422cb7009de8922d56fb8c9
+size 155271
diff --git a/openai_whisper-small/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-small/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3cea13e14c52f389d44ebbeaef758c35719d9f32
--- /dev/null
+++ b/openai_whisper-small/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe35cef2c9406993a635639b16f373f6debb0215ac115b7bf93fa03c8e10310b
+size 176323456
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-small/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8cce829d60c4056be93333a562d47d3bb2908b9b
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f77e6457285248f99cd7aa3fd4cc2efbb17733e63e7023ac53abe1f95785d07
+size 243
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-small/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1d9e7a102f740c68cdfc7272dc5b8007c48416a
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dabdc5aa69f6ef4d97dc9499f5c30514e00e96b53b750b33a5a6471363c71662
+size 328
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-small/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..022c382ec7041de63c59dd98893c3134d01b23aa
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Pad" : 1,
+      "Ios16.mul" : 2,
+      "SliceByIndex" : 1,
+      "Ios16.sub" : 1,
+      "Ios16.log" : 1,
+      "Ios16.conv" : 2,
+      "Ios16.add" : 3,
+      "Ios16.square" : 2,
+      "Ios16.matmul" : 1,
+      "Squeeze" : 2,
+      "Ios16.maximum" : 1,
+      "ExpandDims" : 4,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Ios16.reshape" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-small/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..a63d7fa99d6d86db1b76a1f53640cb4aa25e0210
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            tensor<string, []> input_3_mode_0 = const()[name = tensor<string, []>("input_3_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> input_3_constant_val_0_to_fp16 = const()[name = tensor<string, []>("input_3_constant_val_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = input_3_constant_val_0_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = tensor<string, []>("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = tensor<string, []>("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = tensor<string, []>("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = tensor<string, []>("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = tensor<string, []>("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = tensor<string, []>("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = tensor<string, []>("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = tensor<string, []>("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = tensor<string, []>("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = tensor<string, []>("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = tensor<string, []>("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = tensor<string, []>("magnitudes_cast_fp16")];
+            tensor<bool, []> mel_spec_1_transpose_x_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> mel_spec_1_transpose_y_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = tensor<string, []>("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = tensor<string, []>("mel_spec_1_cast_fp16")];
+            tensor<fp16, []> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = tensor<string, []>("mel_spec_cast_fp16")];
+            tensor<fp16, []> log_0_epsilon_0_to_fp16 = const()[name = tensor<string, []>("log_0_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0_to_fp16, x = mel_spec_cast_fp16)[name = tensor<string, []>("log_0_cast_fp16")];
+            tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
+            tensor<bool, []> var_44_keep_dims_0 = const()[name = tensor<string, []>("op_44_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, []> var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+3)];
+            tensor<fp16, []> var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = tensor<string, []>("log_spec_3_cast_fp16")];
+            tensor<fp16, []> var_50_to_fp16 = const()[name = tensor<string, []>("op_50_to_fp16"), val = tensor<fp16, []>(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<fp16, []> _inversed_log_spec_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_log_spec_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = tensor<string, []>("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = tensor<string, []>("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-small/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-small/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6110c0f1e30e3ddad047c471f30fb114a2e5562e
--- /dev/null
+++ b/openai_whisper-small/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:267017e533b5f542d195fd9a775f2ba649075128283ce8e86c63a2ec20de5b07
+size 354080
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fcd839bbf91242e087302939502d648dd193dfe8
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39c0d6d55353bc61ef8071081bb958dd1ab7b0b7f2a3338a797f1a64211e084c
+size 243
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-small/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..adf3d2eb117cb6a142a8021e6c0b9fdcad7b1518
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2ccd0b8920701386ab9554f7db47b43e55ee07863280ee5d829d5272839adc2
+size 633
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/metadata.json b/openai_whisper-small/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..6cdc64a42b740095de45e0b4e49e261bea73558a
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,165 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51865)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51865]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Split" : 2,
+      "Concat" : 3,
+      "Ios16.rsqrt" : 37,
+      "Ios16.mul" : 146,
+      "Squeeze" : 1,
+      "SliceByIndex" : 20,
+      "Ios16.sub" : 38,
+      "Transpose" : 1,
+      "Ios16.conv" : 120,
+      "Ios16.add" : 110,
+      "Ios16.linear" : 1,
+      "Ios16.matmul" : 48,
+      "Ios16.gelu" : 12,
+      "Ios16.reduceMean" : 75,
+      "ExpandDims" : 6,
+      "Ios16.batchNorm" : 37,
+      "Ios16.gather" : 2,
+      "Ios16.reshape" : 96,
+      "Ios16.softmax" : 24
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 224]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 224]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 768 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 768, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/model.mil b/openai_whisper-small/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..dca5e408299638f61530c44b7ed4442b8e3b646b
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,2105 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 224]> decoder_key_padding_mask, tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 9216, 1, 224]> key_cache, tensor<fp16, [1, 224]> kv_cache_update_mask, tensor<fp16, [1, 9216, 1, 224]> value_cache) {
+            tensor<int32, []> var_40_axis_0 = const()[name = tensor<string, []>("op_40_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_40_batch_dims_0 = const()[name = tensor<string, []>("op_40_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [51865, 768]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 768]> var_40_cast_fp16 = gather(axis = var_40_axis_0, batch_dims = var_40_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_40_cast_fp16")];
+            tensor<int32, []> var_44_axis_0 = const()[name = tensor<string, []>("op_44_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_44_batch_dims_0 = const()[name = tensor<string, []>("op_44_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [448, 768]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79664768)))];
+            tensor<fp16, [1, 768]> var_44_cast_fp16 = gather(axis = var_44_axis_0, batch_dims = var_44_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, [1, 768]> hidden_states_1_cast_fp16 = add(x = var_40_cast_fp16, y = var_44_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_58_axes_0 = const()[name = tensor<string, []>("op_58_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_58_cast_fp16 = expand_dims(axes = var_58_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_58_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 768, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_58_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, [12]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [12]>([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])];
+            tensor<int32, []> var_63_axis_0 = const()[name = tensor<string, []>("op_63_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_0, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_1, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_2, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_3, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_4, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_5, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_6, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_7, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_8, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_9, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_10, tensor<fp16, [1, 768, 1, 224]> var_63_cast_fp16_11 = split(axis = var_63_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_63_cast_fp16")];
+            tensor<int32, [12]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [12]>([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])];
+            tensor<int32, []> var_78_axis_0 = const()[name = tensor<string, []>("op_78_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_0, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_1, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_2, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_3, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_4, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_5, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_6, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_7, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_8, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_9, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_10, tensor<fp16, [1, 768, 1, 224]> var_78_cast_fp16_11 = split(axis = var_78_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_78_cast_fp16")];
+            tensor<int32, []> var_96 = const()[name = tensor<string, []>("op_96"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_103 = const()[name = tensor<string, []>("op_103"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_104 = const()[name = tensor<string, []>("op_104"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_116 = const()[name = tensor<string, []>("op_116"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_116, keep_dims = var_104, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_120 = const()[name = tensor<string, []>("op_120"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_121_cast_fp16 = reduce_mean(axes = var_120, keep_dims = var_104, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_121_cast_fp16")];
+            tensor<fp16, []> var_122_to_fp16 = const()[name = tensor<string, []>("op_122_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_123_cast_fp16 = add(x = var_121_cast_fp16, y = var_122_to_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_123_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80352960)))];
+            tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80354560)))];
+            tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80356160)))];
+            tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80357760)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_138 = const()[name = tensor<string, []>("op_138"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_140 = const()[name = tensor<string, []>("op_140"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80359360)))];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81539072)))];
+            tensor<fp16, [1, 768, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_140, groups = var_103, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_138, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_144 = const()[name = tensor<string, []>("op_144"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_146 = const()[name = tensor<string, []>("op_146"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81540672)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_1_cast_fp16 = conv(dilations = var_146, groups = var_103, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = var_144, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
+            tensor<int32, [2]> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_153 = const()[name = tensor<string, []>("op_153"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82720384)))];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83900096)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_153, groups = var_103, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = var_151, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_157_axes_0 = const()[name = tensor<string, []>("op_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_157_cast_fp16 = expand_dims(axes = var_157_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_157_cast_fp16")];
+            tensor<int32, [1]> var_158_axes_0 = const()[name = tensor<string, []>("op_158_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_158_cast_fp16 = expand_dims(axes = var_158_axes_0, x = var_157_cast_fp16)[name = tensor<string, []>("op_158_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_160_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_160_cast_fp16")];
+            tensor<fp16, []> var_97_to_fp16 = const()[name = tensor<string, []>("op_97_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 1, 1, 224]> var_161_cast_fp16 = sub(x = var_97_to_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_161_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_162_cast_fp16 = mul(x = var_63_cast_fp16_0, y = var_161_cast_fp16)[name = tensor<string, []>("op_162_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_1_cast_fp16 = add(x = var_160_cast_fp16, y = var_162_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_164_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_164_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_166_cast_fp16 = mul(x = var_78_cast_fp16_0, y = var_161_cast_fp16)[name = tensor<string, []>("op_166_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_1_cast_fp16 = add(x = var_164_cast_fp16, y = var_166_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_169 = const()[name = tensor<string, []>("op_169"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_170_cast_fp16 = reshape(shape = var_169, x = query_1_cast_fp16)[name = tensor<string, []>("op_170_cast_fp16")];
+            tensor<fp16, []> var_171_to_fp16 = const()[name = tensor<string, []>("op_171_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_172_cast_fp16 = mul(x = var_170_cast_fp16, y = var_171_to_fp16)[name = tensor<string, []>("op_172_cast_fp16")];
+            tensor<int32, [4]> var_173 = const()[name = tensor<string, []>("op_173"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_174_cast_fp16 = reshape(shape = var_173, x = key_1_cast_fp16)[name = tensor<string, []>("op_174_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_172_cast_fp16, y = var_174_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_178_axes_0 = const()[name = tensor<string, []>("op_178_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_178_cast_fp16 = expand_dims(axes = var_178_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_178_cast_fp16")];
+            tensor<int32, [1]> var_179_axes_0 = const()[name = tensor<string, []>("op_179_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_179_cast_fp16 = expand_dims(axes = var_179_axes_0, x = var_178_cast_fp16)[name = tensor<string, []>("op_179_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_182_cast_fp16 = softmax(axis = var_96, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_182_cast_fp16")];
+            tensor<int32, [4]> var_183 = const()[name = tensor<string, []>("op_183"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_184_cast_fp16 = reshape(shape = var_183, x = value_1_cast_fp16)[name = tensor<string, []>("op_184_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_184_cast_fp16, y = var_182_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_187 = const()[name = tensor<string, []>("op_187"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_1_cast_fp16 = reshape(shape = var_187, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_191 = const()[name = tensor<string, []>("op_191"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_193 = const()[name = tensor<string, []>("op_193"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83901696)))];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85081408)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_193, groups = var_103, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_191, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_203 = const()[name = tensor<string, []>("op_203"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_203, keep_dims = var_104, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_207 = const()[name = tensor<string, []>("op_207"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_208_cast_fp16 = reduce_mean(axes = var_207, keep_dims = var_104, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_208_cast_fp16")];
+            tensor<fp16, []> var_209_to_fp16 = const()[name = tensor<string, []>("op_209_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_210_cast_fp16 = add(x = var_208_cast_fp16, y = var_209_to_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_210_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85083008)))];
+            tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85084608)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_225 = const()[name = tensor<string, []>("op_225"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_227 = const()[name = tensor<string, []>("op_227"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85086208)))];
+            tensor<fp16, [768]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86265920)))];
+            tensor<fp16, [1, 768, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = var_227, groups = var_103, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_225, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_233 = const()[name = tensor<string, []>("op_233"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86267520)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_233, groups = var_103, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_231, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_238 = const()[name = tensor<string, []>("op_238"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_240 = const()[name = tensor<string, []>("op_240"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87447232)))];
+            tensor<fp16, [768]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88626944)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = var_240, groups = var_103, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_238, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_244 = const()[name = tensor<string, []>("op_244"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_245_cast_fp16 = reshape(shape = var_244, x = query_3_cast_fp16)[name = tensor<string, []>("op_245_cast_fp16")];
+            tensor<fp16, []> var_246_to_fp16 = const()[name = tensor<string, []>("op_246_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_247_cast_fp16 = mul(x = var_245_cast_fp16, y = var_246_to_fp16)[name = tensor<string, []>("op_247_cast_fp16")];
+            tensor<int32, [4]> var_248 = const()[name = tensor<string, []>("op_248"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_249_cast_fp16 = reshape(shape = var_248, x = key_3_cast_fp16)[name = tensor<string, []>("op_249_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_247_cast_fp16, y = var_249_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_96, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [4]> var_253 = const()[name = tensor<string, []>("op_253"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_254_cast_fp16 = reshape(shape = var_253, x = value_3_cast_fp16)[name = tensor<string, []>("op_254_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_254_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_257 = const()[name = tensor<string, []>("op_257"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_3_cast_fp16 = reshape(shape = var_257, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_261 = const()[name = tensor<string, []>("op_261"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_263 = const()[name = tensor<string, []>("op_263"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88628544)))];
+            tensor<fp16, [768]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89808256)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = var_263, groups = var_103, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_261, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, [1]> var_269 = const()[name = tensor<string, []>("op_269"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_269, keep_dims = var_104, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_273 = const()[name = tensor<string, []>("op_273"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_274_cast_fp16 = reduce_mean(axes = var_273, keep_dims = var_104, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_274_cast_fp16")];
+            tensor<fp16, []> var_275_to_fp16 = const()[name = tensor<string, []>("op_275_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_276_cast_fp16 = add(x = var_274_cast_fp16, y = var_275_to_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_276_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [768]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89809856)))];
+            tensor<fp16, [768]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89811456)))];
+            tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<int32, [2]> var_287 = const()[name = tensor<string, []>("op_287"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_289 = const()[name = tensor<string, []>("op_289"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89813056)))];
+            tensor<fp16, [3072]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94531712)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_289, groups = var_103, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_287, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_295 = const()[name = tensor<string, []>("op_295"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_297 = const()[name = tensor<string, []>("op_297"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94537920)))];
+            tensor<fp16, [768]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99256576)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_297, groups = var_103, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = var_295, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, []> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_317 = const()[name = tensor<string, []>("op_317"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_330 = const()[name = tensor<string, []>("op_330"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_330, keep_dims = var_318, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_334 = const()[name = tensor<string, []>("op_334"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_335_cast_fp16 = reduce_mean(axes = var_334, keep_dims = var_318, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_335_cast_fp16")];
+            tensor<fp16, []> var_336_to_fp16 = const()[name = tensor<string, []>("op_336_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_337_cast_fp16 = add(x = var_335_cast_fp16, y = var_336_to_fp16)[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_337_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [768]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99258176)))];
+            tensor<fp16, [768]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99259776)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<int32, [2]> var_352 = const()[name = tensor<string, []>("op_352"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_354 = const()[name = tensor<string, []>("op_354"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99261376)))];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100441088)))];
+            tensor<fp16, [1, 768, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_354, groups = var_317, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_352, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_358 = const()[name = tensor<string, []>("op_358"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_360 = const()[name = tensor<string, []>("op_360"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100442688)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_3_cast_fp16 = conv(dilations = var_360, groups = var_317, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = var_358, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
+            tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101622400)))];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102802112)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_367, groups = var_317, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = var_365, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_374_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_374_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_376_cast_fp16 = mul(x = var_63_cast_fp16_1, y = var_161_cast_fp16)[name = tensor<string, []>("op_376_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_5_cast_fp16 = add(x = var_374_cast_fp16, y = var_376_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_378_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_378_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_380_cast_fp16 = mul(x = var_78_cast_fp16_1, y = var_161_cast_fp16)[name = tensor<string, []>("op_380_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_5_cast_fp16 = add(x = var_378_cast_fp16, y = var_380_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_383 = const()[name = tensor<string, []>("op_383"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_384_cast_fp16 = reshape(shape = var_383, x = query_5_cast_fp16)[name = tensor<string, []>("op_384_cast_fp16")];
+            tensor<fp16, []> var_385_to_fp16 = const()[name = tensor<string, []>("op_385_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_386_cast_fp16 = mul(x = var_384_cast_fp16, y = var_385_to_fp16)[name = tensor<string, []>("op_386_cast_fp16")];
+            tensor<int32, [4]> var_387 = const()[name = tensor<string, []>("op_387"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_388_cast_fp16 = reshape(shape = var_387, x = key_5_cast_fp16)[name = tensor<string, []>("op_388_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_386_cast_fp16, y = var_388_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_396_cast_fp16 = softmax(axis = var_310, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_396_cast_fp16")];
+            tensor<int32, [4]> var_397 = const()[name = tensor<string, []>("op_397"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_398_cast_fp16 = reshape(shape = var_397, x = value_5_cast_fp16)[name = tensor<string, []>("op_398_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_398_cast_fp16, y = var_396_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_11_cast_fp16 = reshape(shape = var_401, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102803712)))];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103983424)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_407, groups = var_317, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_405, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, [1]> var_417 = const()[name = tensor<string, []>("op_417"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_417, keep_dims = var_318, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_421 = const()[name = tensor<string, []>("op_421"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_422_cast_fp16 = reduce_mean(axes = var_421, keep_dims = var_318, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_422_cast_fp16")];
+            tensor<fp16, []> var_423_to_fp16 = const()[name = tensor<string, []>("op_423_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_424_cast_fp16 = add(x = var_422_cast_fp16, y = var_423_to_fp16)[name = tensor<string, []>("op_424_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_424_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103985024)))];
+            tensor<fp16, [768]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103986624)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<int32, [2]> var_439 = const()[name = tensor<string, []>("op_439"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_441 = const()[name = tensor<string, []>("op_441"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103988224)))];
+            tensor<fp16, [768]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105167936)))];
+            tensor<fp16, [1, 768, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_441, groups = var_317, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_439, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_445 = const()[name = tensor<string, []>("op_445"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_447 = const()[name = tensor<string, []>("op_447"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105169536)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_447, groups = var_317, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_445, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_452 = const()[name = tensor<string, []>("op_452"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_454 = const()[name = tensor<string, []>("op_454"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106349248)))];
+            tensor<fp16, [768]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107528960)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = var_454, groups = var_317, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_452, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_458 = const()[name = tensor<string, []>("op_458"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_459_cast_fp16 = reshape(shape = var_458, x = query_7_cast_fp16)[name = tensor<string, []>("op_459_cast_fp16")];
+            tensor<fp16, []> var_460_to_fp16 = const()[name = tensor<string, []>("op_460_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_461_cast_fp16 = mul(x = var_459_cast_fp16, y = var_460_to_fp16)[name = tensor<string, []>("op_461_cast_fp16")];
+            tensor<int32, [4]> var_462 = const()[name = tensor<string, []>("op_462"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_463_cast_fp16 = reshape(shape = var_462, x = key_7_cast_fp16)[name = tensor<string, []>("op_463_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_461_cast_fp16, y = var_463_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_310, x = mh_w_11_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<int32, [4]> var_467 = const()[name = tensor<string, []>("op_467"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_468_cast_fp16 = reshape(shape = var_467, x = value_7_cast_fp16)[name = tensor<string, []>("op_468_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_468_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_13_cast_fp16 = reshape(shape = var_471, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<int32, [2]> var_475 = const()[name = tensor<string, []>("op_475"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_477 = const()[name = tensor<string, []>("op_477"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107530560)))];
+            tensor<fp16, [768]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108710272)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_477, groups = var_317, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_475, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_483 = const()[name = tensor<string, []>("op_483"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_483, keep_dims = var_318, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_487 = const()[name = tensor<string, []>("op_487"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_488_cast_fp16 = reduce_mean(axes = var_487, keep_dims = var_318, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<fp16, []> var_489_to_fp16 = const()[name = tensor<string, []>("op_489_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_490_cast_fp16 = add(x = var_488_cast_fp16, y = var_489_to_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_490_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108711872)))];
+            tensor<fp16, [768]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108713472)))];
+            tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_501 = const()[name = tensor<string, []>("op_501"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_503 = const()[name = tensor<string, []>("op_503"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108715072)))];
+            tensor<fp16, [3072]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113433728)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_503, groups = var_317, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_501, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_509 = const()[name = tensor<string, []>("op_509"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_511 = const()[name = tensor<string, []>("op_511"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113439936)))];
+            tensor<fp16, [768]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118158592)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_511, groups = var_317, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_509, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_524 = const()[name = tensor<string, []>("op_524"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_531 = const()[name = tensor<string, []>("op_531"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_532 = const()[name = tensor<string, []>("op_532"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_544 = const()[name = tensor<string, []>("op_544"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_544, keep_dims = var_532, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_548 = const()[name = tensor<string, []>("op_548"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_549_cast_fp16 = reduce_mean(axes = var_548, keep_dims = var_532, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<fp16, []> var_550_to_fp16 = const()[name = tensor<string, []>("op_550_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_551_cast_fp16 = add(x = var_549_cast_fp16, y = var_550_to_fp16)[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_551_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118160192)))];
+            tensor<fp16, [768]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118161792)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<int32, [2]> var_566 = const()[name = tensor<string, []>("op_566"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_568 = const()[name = tensor<string, []>("op_568"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118163392)))];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119343104)))];
+            tensor<fp16, [1, 768, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_568, groups = var_531, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_566, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_572 = const()[name = tensor<string, []>("op_572"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_574 = const()[name = tensor<string, []>("op_574"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_5_pad_type_0 = const()[name = tensor<string, []>("current_key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_5_pad_0 = const()[name = tensor<string, []>("current_key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119344704)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_5_cast_fp16 = conv(dilations = var_574, groups = var_531, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = var_572, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_key_5_cast_fp16")];
+            tensor<int32, [2]> var_579 = const()[name = tensor<string, []>("op_579"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_581 = const()[name = tensor<string, []>("op_581"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_5_pad_type_0 = const()[name = tensor<string, []>("current_value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_5_pad_0 = const()[name = tensor<string, []>("current_value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120524416)))];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121704128)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_581, groups = var_531, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = var_579, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_588_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_590_cast_fp16 = mul(x = var_63_cast_fp16_2, y = var_161_cast_fp16)[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_9_cast_fp16 = add(x = var_588_cast_fp16, y = var_590_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_592_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_592_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_594_cast_fp16 = mul(x = var_78_cast_fp16_2, y = var_161_cast_fp16)[name = tensor<string, []>("op_594_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_9_cast_fp16 = add(x = var_592_cast_fp16, y = var_594_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_597 = const()[name = tensor<string, []>("op_597"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_598_cast_fp16 = reshape(shape = var_597, x = query_9_cast_fp16)[name = tensor<string, []>("op_598_cast_fp16")];
+            tensor<fp16, []> var_599_to_fp16 = const()[name = tensor<string, []>("op_599_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor<string, []>("op_600_cast_fp16")];
+            tensor<int32, [4]> var_601 = const()[name = tensor<string, []>("op_601"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_602_cast_fp16 = reshape(shape = var_601, x = key_9_cast_fp16)[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_600_cast_fp16, y = var_602_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_610_cast_fp16 = softmax(axis = var_524, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_610_cast_fp16")];
+            tensor<int32, [4]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_612_cast_fp16 = reshape(shape = var_611, x = value_9_cast_fp16)[name = tensor<string, []>("op_612_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_612_cast_fp16, y = var_610_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_615 = const()[name = tensor<string, []>("op_615"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_21_cast_fp16 = reshape(shape = var_615, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<int32, [2]> var_619 = const()[name = tensor<string, []>("op_619"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_621 = const()[name = tensor<string, []>("op_621"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121705728)))];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122885440)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_621, groups = var_531, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_619, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_631 = const()[name = tensor<string, []>("op_631"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_631, keep_dims = var_532, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_635 = const()[name = tensor<string, []>("op_635"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_636_cast_fp16 = reduce_mean(axes = var_635, keep_dims = var_532, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_636_cast_fp16")];
+            tensor<fp16, []> var_637_to_fp16 = const()[name = tensor<string, []>("op_637_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_638_cast_fp16 = add(x = var_636_cast_fp16, y = var_637_to_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_638_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [768]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122887040)))];
+            tensor<fp16, [768]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122888640)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<int32, [2]> var_653 = const()[name = tensor<string, []>("op_653"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_655 = const()[name = tensor<string, []>("op_655"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122890240)))];
+            tensor<fp16, [768]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124069952)))];
+            tensor<fp16, [1, 768, 1, 1]> query_11_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = var_655, groups = var_531, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_653, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<int32, [2]> var_659 = const()[name = tensor<string, []>("op_659"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_661 = const()[name = tensor<string, []>("op_661"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124071552)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_11_cast_fp16 = conv(dilations = var_661, groups = var_531, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_659, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<int32, [2]> var_666 = const()[name = tensor<string, []>("op_666"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_668 = const()[name = tensor<string, []>("op_668"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125251264)))];
+            tensor<fp16, [768]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126430976)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = var_668, groups = var_531, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_666, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_672 = const()[name = tensor<string, []>("op_672"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_673_cast_fp16 = reshape(shape = var_672, x = query_11_cast_fp16)[name = tensor<string, []>("op_673_cast_fp16")];
+            tensor<fp16, []> var_674_to_fp16 = const()[name = tensor<string, []>("op_674_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_675_cast_fp16 = mul(x = var_673_cast_fp16, y = var_674_to_fp16)[name = tensor<string, []>("op_675_cast_fp16")];
+            tensor<int32, [4]> var_676 = const()[name = tensor<string, []>("op_676"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_677_cast_fp16 = reshape(shape = var_676, x = key_11_cast_fp16)[name = tensor<string, []>("op_677_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_675_cast_fp16, y = var_677_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_41_cast_fp16 = softmax(axis = var_524, x = mh_w_17_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_681 = const()[name = tensor<string, []>("op_681"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_682_cast_fp16 = reshape(shape = var_681, x = value_11_cast_fp16)[name = tensor<string, []>("op_682_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_682_cast_fp16, y = obj_41_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_685 = const()[name = tensor<string, []>("op_685"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_23_cast_fp16 = reshape(shape = var_685, x = attn_11_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_689 = const()[name = tensor<string, []>("op_689"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_691 = const()[name = tensor<string, []>("op_691"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126432576)))];
+            tensor<fp16, [768]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127612288)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_39_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = var_691, groups = var_531, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_689, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, [1]> var_697 = const()[name = tensor<string, []>("op_697"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_697, keep_dims = var_532, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_701 = const()[name = tensor<string, []>("op_701"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_702_cast_fp16 = reduce_mean(axes = var_701, keep_dims = var_532, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_702_cast_fp16")];
+            tensor<fp16, []> var_703_to_fp16 = const()[name = tensor<string, []>("op_703_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_704_cast_fp16 = add(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor<string, []>("op_704_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_704_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [768]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127613888)))];
+            tensor<fp16, [768]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127615488)))];
+            tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_715 = const()[name = tensor<string, []>("op_715"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_717 = const()[name = tensor<string, []>("op_717"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127617088)))];
+            tensor<fp16, [3072]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132335744)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_717, groups = var_531, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = var_715, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_mode_0 = const()[name = tensor<string, []>("input_29_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<int32, [2]> var_723 = const()[name = tensor<string, []>("op_723"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_725 = const()[name = tensor<string, []>("op_725"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132341952)))];
+            tensor<fp16, [768]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137060608)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_725, groups = var_531, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_723, weight = layers_2_fc2_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, []> var_738 = const()[name = tensor<string, []>("op_738"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_745 = const()[name = tensor<string, []>("op_745"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_746 = const()[name = tensor<string, []>("op_746"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_758 = const()[name = tensor<string, []>("op_758"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_758, keep_dims = var_746, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_762 = const()[name = tensor<string, []>("op_762"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_763_cast_fp16 = reduce_mean(axes = var_762, keep_dims = var_746, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_763_cast_fp16")];
+            tensor<fp16, []> var_764_to_fp16 = const()[name = tensor<string, []>("op_764_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_765_cast_fp16 = add(x = var_763_cast_fp16, y = var_764_to_fp16)[name = tensor<string, []>("op_765_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_765_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [768]> obj_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137062208)))];
+            tensor<fp16, [768]> obj_43_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137063808)))];
+            tensor<fp16, []> obj_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<int32, [2]> var_780 = const()[name = tensor<string, []>("op_780"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_782 = const()[name = tensor<string, []>("op_782"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137065408)))];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138245120)))];
+            tensor<fp16, [1, 768, 1, 1]> query_13_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_782, groups = var_745, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_780, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<int32, [2]> var_786 = const()[name = tensor<string, []>("op_786"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_788 = const()[name = tensor<string, []>("op_788"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_7_pad_type_0 = const()[name = tensor<string, []>("current_key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_7_pad_0 = const()[name = tensor<string, []>("current_key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138246720)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_7_cast_fp16 = conv(dilations = var_788, groups = var_745, pad = current_key_7_pad_0, pad_type = current_key_7_pad_type_0, strides = var_786, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_key_7_cast_fp16")];
+            tensor<int32, [2]> var_793 = const()[name = tensor<string, []>("op_793"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_795 = const()[name = tensor<string, []>("op_795"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_7_pad_type_0 = const()[name = tensor<string, []>("current_value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_7_pad_0 = const()[name = tensor<string, []>("current_value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139426432)))];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140606144)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_795, groups = var_745, pad = current_value_7_pad_0, pad_type = current_value_7_pad_type_0, strides = var_793, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_value_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_802_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_802_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_804_cast_fp16 = mul(x = var_63_cast_fp16_3, y = var_161_cast_fp16)[name = tensor<string, []>("op_804_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_13_cast_fp16 = add(x = var_802_cast_fp16, y = var_804_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_806_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_806_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_808_cast_fp16 = mul(x = var_78_cast_fp16_3, y = var_161_cast_fp16)[name = tensor<string, []>("op_808_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_13_cast_fp16 = add(x = var_806_cast_fp16, y = var_808_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_811 = const()[name = tensor<string, []>("op_811"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_812_cast_fp16 = reshape(shape = var_811, x = query_13_cast_fp16)[name = tensor<string, []>("op_812_cast_fp16")];
+            tensor<fp16, []> var_813_to_fp16 = const()[name = tensor<string, []>("op_813_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_814_cast_fp16 = mul(x = var_812_cast_fp16, y = var_813_to_fp16)[name = tensor<string, []>("op_814_cast_fp16")];
+            tensor<int32, [4]> var_815 = const()[name = tensor<string, []>("op_815"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_816_cast_fp16 = reshape(shape = var_815, x = key_13_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_814_cast_fp16, y = var_816_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_824_cast_fp16 = softmax(axis = var_738, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<int32, [4]> var_825 = const()[name = tensor<string, []>("op_825"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_826_cast_fp16 = reshape(shape = var_825, x = value_13_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_826_cast_fp16, y = var_824_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_829 = const()[name = tensor<string, []>("op_829"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_31_cast_fp16 = reshape(shape = var_829, x = attn_13_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_833 = const()[name = tensor<string, []>("op_833"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_835 = const()[name = tensor<string, []>("op_835"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_49_pad_type_0 = const()[name = tensor<string, []>("obj_49_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_49_pad_0 = const()[name = tensor<string, []>("obj_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140607744)))];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141787456)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_49_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_835, groups = var_745, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = var_833, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, [1]> var_845 = const()[name = tensor<string, []>("op_845"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_845, keep_dims = var_746, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_849 = const()[name = tensor<string, []>("op_849"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_850_cast_fp16 = reduce_mean(axes = var_849, keep_dims = var_746, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_850_cast_fp16")];
+            tensor<fp16, []> var_851_to_fp16 = const()[name = tensor<string, []>("op_851_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_852_cast_fp16 = add(x = var_850_cast_fp16, y = var_851_to_fp16)[name = tensor<string, []>("op_852_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_852_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141789056)))];
+            tensor<fp16, [768]> obj_51_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141790656)))];
+            tensor<fp16, []> obj_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<int32, [2]> var_867 = const()[name = tensor<string, []>("op_867"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_869 = const()[name = tensor<string, []>("op_869"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_15_pad_type_0 = const()[name = tensor<string, []>("query_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_15_pad_0 = const()[name = tensor<string, []>("query_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141792256)))];
+            tensor<fp16, [768]> layers_3_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142971968)))];
+            tensor<fp16, [1, 768, 1, 1]> query_15_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_bias_to_fp16, dilations = var_869, groups = var_745, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_867, weight = layers_3_encoder_attn_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
+            tensor<int32, [2]> var_873 = const()[name = tensor<string, []>("op_873"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_15_pad_type_0 = const()[name = tensor<string, []>("key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_15_pad_0 = const()[name = tensor<string, []>("key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142973568)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_15_cast_fp16 = conv(dilations = var_875, groups = var_745, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_873, weight = layers_3_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_15_cast_fp16")];
+            tensor<int32, [2]> var_880 = const()[name = tensor<string, []>("op_880"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_882 = const()[name = tensor<string, []>("op_882"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_15_pad_type_0 = const()[name = tensor<string, []>("value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_15_pad_0 = const()[name = tensor<string, []>("value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144153280)))];
+            tensor<fp16, [768]> layers_3_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145332992)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_15_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_bias_to_fp16, dilations = var_882, groups = var_745, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_880, weight = layers_3_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_15_cast_fp16")];
+            tensor<int32, [4]> var_886 = const()[name = tensor<string, []>("op_886"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_887_cast_fp16 = reshape(shape = var_886, x = query_15_cast_fp16)[name = tensor<string, []>("op_887_cast_fp16")];
+            tensor<fp16, []> var_888_to_fp16 = const()[name = tensor<string, []>("op_888_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_889_cast_fp16 = mul(x = var_887_cast_fp16, y = var_888_to_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<int32, [4]> var_890 = const()[name = tensor<string, []>("op_890"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_891_cast_fp16 = reshape(shape = var_890, x = key_15_cast_fp16)[name = tensor<string, []>("op_891_cast_fp16")];
+            tensor<bool, []> mh_w_23_transpose_x_0 = const()[name = tensor<string, []>("mh_w_23_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_23_transpose_y_0 = const()[name = tensor<string, []>("mh_w_23_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_889_cast_fp16, y = var_891_cast_fp16)[name = tensor<string, []>("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_55_cast_fp16 = softmax(axis = var_738, x = mh_w_23_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<int32, [4]> var_895 = const()[name = tensor<string, []>("op_895"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_896_cast_fp16 = reshape(shape = var_895, x = value_15_cast_fp16)[name = tensor<string, []>("op_896_cast_fp16")];
+            tensor<bool, []> attn_15_transpose_x_0 = const()[name = tensor<string, []>("attn_15_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_15_transpose_y_0 = const()[name = tensor<string, []>("attn_15_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_896_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_899 = const()[name = tensor<string, []>("op_899"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_33_cast_fp16 = reshape(shape = var_899, x = attn_15_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_903 = const()[name = tensor<string, []>("op_903"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_905 = const()[name = tensor<string, []>("op_905"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_53_pad_type_0 = const()[name = tensor<string, []>("obj_53_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_53_pad_0 = const()[name = tensor<string, []>("obj_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145334592)))];
+            tensor<fp16, [768]> layers_3_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146514304)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_53_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_bias_to_fp16, dilations = var_905, groups = var_745, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = var_903, weight = layers_3_encoder_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_911 = const()[name = tensor<string, []>("op_911"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_911, keep_dims = var_746, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_915 = const()[name = tensor<string, []>("op_915"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_916_cast_fp16 = reduce_mean(axes = var_915, keep_dims = var_746, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<fp16, []> var_917_to_fp16 = const()[name = tensor<string, []>("op_917_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_918_cast_fp16 = add(x = var_916_cast_fp16, y = var_917_to_fp16)[name = tensor<string, []>("op_918_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_918_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146515904)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146517504)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_929 = const()[name = tensor<string, []>("op_929"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_931 = const()[name = tensor<string, []>("op_931"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146519104)))];
+            tensor<fp16, [3072]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151237760)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_37_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_931, groups = var_745, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_929, weight = layers_3_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<int32, [2]> var_937 = const()[name = tensor<string, []>("op_937"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_939 = const()[name = tensor<string, []>("op_939"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151243968)))];
+            tensor<fp16, [768]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155962624)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_9_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_939, groups = var_745, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_937, weight = layers_3_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_952 = const()[name = tensor<string, []>("op_952"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_959 = const()[name = tensor<string, []>("op_959"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_960 = const()[name = tensor<string, []>("op_960"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_972 = const()[name = tensor<string, []>("op_972"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_25_cast_fp16 = reduce_mean(axes = var_972, keep_dims = var_960, x = inputs_25_cast_fp16)[name = tensor<string, []>("channels_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_25_cast_fp16 = sub(x = inputs_25_cast_fp16, y = channels_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = zero_mean_25_cast_fp16)[name = tensor<string, []>("zero_mean_sq_25_cast_fp16")];
+            tensor<int32, [1]> var_976 = const()[name = tensor<string, []>("op_976"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_977_cast_fp16 = reduce_mean(axes = var_976, keep_dims = var_960, x = zero_mean_sq_25_cast_fp16)[name = tensor<string, []>("op_977_cast_fp16")];
+            tensor<fp16, []> var_978_to_fp16 = const()[name = tensor<string, []>("op_978_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_979_cast_fp16 = add(x = var_977_cast_fp16, y = var_978_to_fp16)[name = tensor<string, []>("op_979_cast_fp16")];
+            tensor<fp16, []> denom_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0_to_fp16, x = var_979_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = denom_25_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_57_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155964224)))];
+            tensor<fp16, [768]> obj_57_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_57_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155965824)))];
+            tensor<fp16, []> obj_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor<string, []>("obj_57_cast_fp16")];
+            tensor<int32, [2]> var_994 = const()[name = tensor<string, []>("op_994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_996 = const()[name = tensor<string, []>("op_996"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_17_pad_type_0 = const()[name = tensor<string, []>("query_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_17_pad_0 = const()[name = tensor<string, []>("query_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(155967424)))];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157147136)))];
+            tensor<fp16, [1, 768, 1, 1]> query_17_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_996, groups = var_959, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_994, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
+            tensor<int32, [2]> var_1000 = const()[name = tensor<string, []>("op_1000"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1002 = const()[name = tensor<string, []>("op_1002"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_9_pad_type_0 = const()[name = tensor<string, []>("current_key_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_9_pad_0 = const()[name = tensor<string, []>("current_key_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157148736)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_9_cast_fp16 = conv(dilations = var_1002, groups = var_959, pad = current_key_9_pad_0, pad_type = current_key_9_pad_type_0, strides = var_1000, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_key_9_cast_fp16")];
+            tensor<int32, [2]> var_1007 = const()[name = tensor<string, []>("op_1007"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1009 = const()[name = tensor<string, []>("op_1009"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_9_pad_type_0 = const()[name = tensor<string, []>("current_value_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_9_pad_0 = const()[name = tensor<string, []>("current_value_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158328448)))];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(159508160)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_1009, groups = var_959, pad = current_value_9_pad_0, pad_type = current_value_9_pad_type_0, strides = var_1007, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor<string, []>("current_value_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1016_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1016_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1018_cast_fp16 = mul(x = var_63_cast_fp16_4, y = var_161_cast_fp16)[name = tensor<string, []>("op_1018_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_17_cast_fp16 = add(x = var_1016_cast_fp16, y = var_1018_cast_fp16)[name = tensor<string, []>("key_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1020_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1020_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1022_cast_fp16 = mul(x = var_78_cast_fp16_4, y = var_161_cast_fp16)[name = tensor<string, []>("op_1022_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_17_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1022_cast_fp16)[name = tensor<string, []>("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1025 = const()[name = tensor<string, []>("op_1025"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1026_cast_fp16 = reshape(shape = var_1025, x = query_17_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<fp16, []> var_1027_to_fp16 = const()[name = tensor<string, []>("op_1027_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1028_cast_fp16 = mul(x = var_1026_cast_fp16, y = var_1027_to_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
+            tensor<int32, [4]> var_1029 = const()[name = tensor<string, []>("op_1029"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1030_cast_fp16 = reshape(shape = var_1029, x = key_17_cast_fp16)[name = tensor<string, []>("op_1030_cast_fp16")];
+            tensor<bool, []> mh_w_25_transpose_x_0 = const()[name = tensor<string, []>("mh_w_25_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_25_transpose_y_0 = const()[name = tensor<string, []>("mh_w_25_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1028_cast_fp16, y = var_1030_cast_fp16)[name = tensor<string, []>("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1038_cast_fp16 = softmax(axis = var_952, x = mh_w_27_cast_fp16)[name = tensor<string, []>("op_1038_cast_fp16")];
+            tensor<int32, [4]> var_1039 = const()[name = tensor<string, []>("op_1039"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1040_cast_fp16 = reshape(shape = var_1039, x = value_17_cast_fp16)[name = tensor<string, []>("op_1040_cast_fp16")];
+            tensor<bool, []> attn_17_transpose_x_0 = const()[name = tensor<string, []>("attn_17_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_17_transpose_y_0 = const()[name = tensor<string, []>("attn_17_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1040_cast_fp16, y = var_1038_cast_fp16)[name = tensor<string, []>("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1043 = const()[name = tensor<string, []>("op_1043"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_41_cast_fp16 = reshape(shape = var_1043, x = attn_17_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [2]> var_1047 = const()[name = tensor<string, []>("op_1047"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1049 = const()[name = tensor<string, []>("op_1049"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_63_pad_type_0 = const()[name = tensor<string, []>("obj_63_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_63_pad_0 = const()[name = tensor<string, []>("obj_63_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(159509760)))];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160689472)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_63_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_1049, groups = var_959, pad = obj_63_pad_0, pad_type = obj_63_pad_type_0, strides = var_1047, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("obj_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_63_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> var_1059 = const()[name = tensor<string, []>("op_1059"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_27_cast_fp16 = reduce_mean(axes = var_1059, keep_dims = var_960, x = inputs_27_cast_fp16)[name = tensor<string, []>("channels_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_27_cast_fp16 = sub(x = inputs_27_cast_fp16, y = channels_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = zero_mean_27_cast_fp16)[name = tensor<string, []>("zero_mean_sq_27_cast_fp16")];
+            tensor<int32, [1]> var_1063 = const()[name = tensor<string, []>("op_1063"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1064_cast_fp16 = reduce_mean(axes = var_1063, keep_dims = var_960, x = zero_mean_sq_27_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, []> var_1065_to_fp16 = const()[name = tensor<string, []>("op_1065_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1066_cast_fp16 = add(x = var_1064_cast_fp16, y = var_1065_to_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
+            tensor<fp16, []> denom_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0_to_fp16, x = var_1066_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = denom_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
+            tensor<fp16, [768]> obj_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_65_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160691072)))];
+            tensor<fp16, [768]> obj_65_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_65_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160692672)))];
+            tensor<fp16, []> obj_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor<string, []>("obj_65_cast_fp16")];
+            tensor<int32, [2]> var_1081 = const()[name = tensor<string, []>("op_1081"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1083 = const()[name = tensor<string, []>("op_1083"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_19_pad_type_0 = const()[name = tensor<string, []>("query_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_19_pad_0 = const()[name = tensor<string, []>("query_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160694272)))];
+            tensor<fp16, [768]> layers_4_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161873984)))];
+            tensor<fp16, [1, 768, 1, 1]> query_19_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_bias_to_fp16, dilations = var_1083, groups = var_959, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_1081, weight = layers_4_encoder_attn_q_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
+            tensor<int32, [2]> var_1087 = const()[name = tensor<string, []>("op_1087"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1089 = const()[name = tensor<string, []>("op_1089"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_19_pad_type_0 = const()[name = tensor<string, []>("key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_19_pad_0 = const()[name = tensor<string, []>("key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161875584)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_19_cast_fp16 = conv(dilations = var_1089, groups = var_959, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_1087, weight = layers_4_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_19_cast_fp16")];
+            tensor<int32, [2]> var_1094 = const()[name = tensor<string, []>("op_1094"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1096 = const()[name = tensor<string, []>("op_1096"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_19_pad_type_0 = const()[name = tensor<string, []>("value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_19_pad_0 = const()[name = tensor<string, []>("value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163055296)))];
+            tensor<fp16, [768]> layers_4_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164235008)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_19_cast_fp16 = conv(bias = layers_4_encoder_attn_v_proj_bias_to_fp16, dilations = var_1096, groups = var_959, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_1094, weight = layers_4_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1100 = const()[name = tensor<string, []>("op_1100"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1101_cast_fp16 = reshape(shape = var_1100, x = query_19_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, []> var_1102_to_fp16 = const()[name = tensor<string, []>("op_1102_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1103_cast_fp16 = mul(x = var_1101_cast_fp16, y = var_1102_to_fp16)[name = tensor<string, []>("op_1103_cast_fp16")];
+            tensor<int32, [4]> var_1104 = const()[name = tensor<string, []>("op_1104"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1105_cast_fp16 = reshape(shape = var_1104, x = key_19_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<bool, []> mh_w_29_transpose_x_0 = const()[name = tensor<string, []>("mh_w_29_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_29_transpose_y_0 = const()[name = tensor<string, []>("mh_w_29_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1103_cast_fp16, y = var_1105_cast_fp16)[name = tensor<string, []>("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_69_cast_fp16 = softmax(axis = var_952, x = mh_w_29_cast_fp16)[name = tensor<string, []>("obj_69_cast_fp16")];
+            tensor<int32, [4]> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1110_cast_fp16 = reshape(shape = var_1109, x = value_19_cast_fp16)[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<bool, []> attn_19_transpose_x_0 = const()[name = tensor<string, []>("attn_19_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_19_transpose_y_0 = const()[name = tensor<string, []>("attn_19_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1110_cast_fp16, y = obj_69_cast_fp16)[name = tensor<string, []>("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1113 = const()[name = tensor<string, []>("op_1113"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_43_cast_fp16 = reshape(shape = var_1113, x = attn_19_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<int32, [2]> var_1117 = const()[name = tensor<string, []>("op_1117"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1119 = const()[name = tensor<string, []>("op_1119"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_67_pad_type_0 = const()[name = tensor<string, []>("obj_67_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_67_pad_0 = const()[name = tensor<string, []>("obj_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164236608)))];
+            tensor<fp16, [768]> layers_4_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165416320)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_67_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_bias_to_fp16, dilations = var_1119, groups = var_959, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = var_1117, weight = layers_4_encoder_attn_o_proj_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("obj_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_67_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, [1]> var_1125 = const()[name = tensor<string, []>("op_1125"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_29_cast_fp16 = reduce_mean(axes = var_1125, keep_dims = var_960, x = inputs_29_cast_fp16)[name = tensor<string, []>("channels_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_29_cast_fp16 = sub(x = inputs_29_cast_fp16, y = channels_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = zero_mean_29_cast_fp16)[name = tensor<string, []>("zero_mean_sq_29_cast_fp16")];
+            tensor<int32, [1]> var_1129 = const()[name = tensor<string, []>("op_1129"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1130_cast_fp16 = reduce_mean(axes = var_1129, keep_dims = var_960, x = zero_mean_sq_29_cast_fp16)[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<fp16, []> var_1131_to_fp16 = const()[name = tensor<string, []>("op_1131_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1132_cast_fp16 = add(x = var_1130_cast_fp16, y = var_1131_to_fp16)[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<fp16, []> denom_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0_to_fp16, x = var_1132_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = denom_29_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
+            tensor<fp16, [768]> input_45_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165417920)))];
+            tensor<fp16, [768]> input_45_beta_0_to_fp16 = const()[name = tensor<string, []>("input_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165419520)))];
+            tensor<fp16, []> input_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<int32, [2]> var_1143 = const()[name = tensor<string, []>("op_1143"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1145 = const()[name = tensor<string, []>("op_1145"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_47_pad_type_0 = const()[name = tensor<string, []>("input_47_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_47_pad_0 = const()[name = tensor<string, []>("input_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165421120)))];
+            tensor<fp16, [3072]> layers_4_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(170139776)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_47_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_1145, groups = var_959, pad = input_47_pad_0, pad_type = input_47_pad_type_0, strides = var_1143, weight = layers_4_fc1_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_mode_0 = const()[name = tensor<string, []>("input_49_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<int32, [2]> var_1151 = const()[name = tensor<string, []>("op_1151"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1153 = const()[name = tensor<string, []>("op_1153"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_11_pad_type_0 = const()[name = tensor<string, []>("hidden_states_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_11_pad_0 = const()[name = tensor<string, []>("hidden_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(170145984)))];
+            tensor<fp16, [768]> layers_4_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_4_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174864640)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_11_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_1153, groups = var_959, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_1151, weight = layers_4_fc2_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, []> var_1166 = const()[name = tensor<string, []>("op_1166"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1173 = const()[name = tensor<string, []>("op_1173"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1174 = const()[name = tensor<string, []>("op_1174"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1186 = const()[name = tensor<string, []>("op_1186"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_31_cast_fp16 = reduce_mean(axes = var_1186, keep_dims = var_1174, x = inputs_31_cast_fp16)[name = tensor<string, []>("channels_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_31_cast_fp16 = sub(x = inputs_31_cast_fp16, y = channels_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = zero_mean_31_cast_fp16)[name = tensor<string, []>("zero_mean_sq_31_cast_fp16")];
+            tensor<int32, [1]> var_1190 = const()[name = tensor<string, []>("op_1190"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1191_cast_fp16 = reduce_mean(axes = var_1190, keep_dims = var_1174, x = zero_mean_sq_31_cast_fp16)[name = tensor<string, []>("op_1191_cast_fp16")];
+            tensor<fp16, []> var_1192_to_fp16 = const()[name = tensor<string, []>("op_1192_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1193_cast_fp16 = add(x = var_1191_cast_fp16, y = var_1192_to_fp16)[name = tensor<string, []>("op_1193_cast_fp16")];
+            tensor<fp16, []> denom_31_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_31_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0_to_fp16, x = var_1193_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = denom_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
+            tensor<fp16, [768]> obj_71_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_71_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174866240)))];
+            tensor<fp16, [768]> obj_71_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_71_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174867840)))];
+            tensor<fp16, []> obj_71_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_71_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_71_cast_fp16 = batch_norm(beta = obj_71_beta_0_to_fp16, epsilon = obj_71_epsilon_0_to_fp16, gamma = obj_71_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor<string, []>("obj_71_cast_fp16")];
+            tensor<int32, [2]> var_1208 = const()[name = tensor<string, []>("op_1208"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1210 = const()[name = tensor<string, []>("op_1210"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_21_pad_type_0 = const()[name = tensor<string, []>("query_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_21_pad_0 = const()[name = tensor<string, []>("query_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174869440)))];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176049152)))];
+            tensor<fp16, [1, 768, 1, 1]> query_21_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_1210, groups = var_1173, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_1208, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
+            tensor<int32, [2]> var_1214 = const()[name = tensor<string, []>("op_1214"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1216 = const()[name = tensor<string, []>("op_1216"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_11_pad_type_0 = const()[name = tensor<string, []>("current_key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_11_pad_0 = const()[name = tensor<string, []>("current_key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176050752)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_11_cast_fp16 = conv(dilations = var_1216, groups = var_1173, pad = current_key_11_pad_0, pad_type = current_key_11_pad_type_0, strides = var_1214, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_key_11_cast_fp16")];
+            tensor<int32, [2]> var_1221 = const()[name = tensor<string, []>("op_1221"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1223 = const()[name = tensor<string, []>("op_1223"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_11_pad_type_0 = const()[name = tensor<string, []>("current_value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_11_pad_0 = const()[name = tensor<string, []>("current_value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177230464)))];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178410176)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_1223, groups = var_1173, pad = current_value_11_pad_0, pad_type = current_value_11_pad_type_0, strides = var_1221, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor<string, []>("current_value_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1230_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1230_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1232_cast_fp16 = mul(x = var_63_cast_fp16_5, y = var_161_cast_fp16)[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_21_cast_fp16 = add(x = var_1230_cast_fp16, y = var_1232_cast_fp16)[name = tensor<string, []>("key_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1234_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1234_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1236_cast_fp16 = mul(x = var_78_cast_fp16_5, y = var_161_cast_fp16)[name = tensor<string, []>("op_1236_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_21_cast_fp16 = add(x = var_1234_cast_fp16, y = var_1236_cast_fp16)[name = tensor<string, []>("value_21_cast_fp16")];
+            tensor<int32, [4]> var_1239 = const()[name = tensor<string, []>("op_1239"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1240_cast_fp16 = reshape(shape = var_1239, x = query_21_cast_fp16)[name = tensor<string, []>("op_1240_cast_fp16")];
+            tensor<fp16, []> var_1241_to_fp16 = const()[name = tensor<string, []>("op_1241_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor<string, []>("op_1242_cast_fp16")];
+            tensor<int32, [4]> var_1243 = const()[name = tensor<string, []>("op_1243"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1244_cast_fp16 = reshape(shape = var_1243, x = key_21_cast_fp16)[name = tensor<string, []>("op_1244_cast_fp16")];
+            tensor<bool, []> mh_w_31_transpose_x_0 = const()[name = tensor<string, []>("mh_w_31_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_31_transpose_y_0 = const()[name = tensor<string, []>("mh_w_31_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_1242_cast_fp16, y = var_1244_cast_fp16)[name = tensor<string, []>("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1252_cast_fp16 = softmax(axis = var_1166, x = mh_w_33_cast_fp16)[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<int32, [4]> var_1253 = const()[name = tensor<string, []>("op_1253"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1254_cast_fp16 = reshape(shape = var_1253, x = value_21_cast_fp16)[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<bool, []> attn_21_transpose_x_0 = const()[name = tensor<string, []>("attn_21_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_21_transpose_y_0 = const()[name = tensor<string, []>("attn_21_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1254_cast_fp16, y = var_1252_cast_fp16)[name = tensor<string, []>("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1257 = const()[name = tensor<string, []>("op_1257"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_51_cast_fp16 = reshape(shape = var_1257, x = attn_21_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<int32, [2]> var_1261 = const()[name = tensor<string, []>("op_1261"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1263 = const()[name = tensor<string, []>("op_1263"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_77_pad_type_0 = const()[name = tensor<string, []>("obj_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_77_pad_0 = const()[name = tensor<string, []>("obj_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178411776)))];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179591488)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_77_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_1263, groups = var_1173, pad = obj_77_pad_0, pad_type = obj_77_pad_type_0, strides = var_1261, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("obj_77_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_77_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, [1]> var_1273 = const()[name = tensor<string, []>("op_1273"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_33_cast_fp16 = reduce_mean(axes = var_1273, keep_dims = var_1174, x = inputs_33_cast_fp16)[name = tensor<string, []>("channels_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_33_cast_fp16 = sub(x = inputs_33_cast_fp16, y = channels_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = zero_mean_33_cast_fp16)[name = tensor<string, []>("zero_mean_sq_33_cast_fp16")];
+            tensor<int32, [1]> var_1277 = const()[name = tensor<string, []>("op_1277"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1278_cast_fp16 = reduce_mean(axes = var_1277, keep_dims = var_1174, x = zero_mean_sq_33_cast_fp16)[name = tensor<string, []>("op_1278_cast_fp16")];
+            tensor<fp16, []> var_1279_to_fp16 = const()[name = tensor<string, []>("op_1279_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1280_cast_fp16 = add(x = var_1278_cast_fp16, y = var_1279_to_fp16)[name = tensor<string, []>("op_1280_cast_fp16")];
+            tensor<fp16, []> denom_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0_to_fp16, x = var_1280_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = denom_33_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_79_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_79_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179593088)))];
+            tensor<fp16, [768]> obj_79_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_79_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179594688)))];
+            tensor<fp16, []> obj_79_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_79_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor<string, []>("obj_79_cast_fp16")];
+            tensor<int32, [2]> var_1295 = const()[name = tensor<string, []>("op_1295"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1297 = const()[name = tensor<string, []>("op_1297"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_23_pad_type_0 = const()[name = tensor<string, []>("query_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_23_pad_0 = const()[name = tensor<string, []>("query_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(179596288)))];
+            tensor<fp16, [768]> layers_5_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180776000)))];
+            tensor<fp16, [1, 768, 1, 1]> query_23_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_bias_to_fp16, dilations = var_1297, groups = var_1173, pad = query_23_pad_0, pad_type = query_23_pad_type_0, strides = var_1295, weight = layers_5_encoder_attn_q_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor<string, []>("query_23_cast_fp16")];
+            tensor<int32, [2]> var_1301 = const()[name = tensor<string, []>("op_1301"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1303 = const()[name = tensor<string, []>("op_1303"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_23_pad_type_0 = const()[name = tensor<string, []>("key_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_23_pad_0 = const()[name = tensor<string, []>("key_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180777600)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_23_cast_fp16 = conv(dilations = var_1303, groups = var_1173, pad = key_23_pad_0, pad_type = key_23_pad_type_0, strides = var_1301, weight = layers_5_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_23_cast_fp16")];
+            tensor<int32, [2]> var_1308 = const()[name = tensor<string, []>("op_1308"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1310 = const()[name = tensor<string, []>("op_1310"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_23_pad_type_0 = const()[name = tensor<string, []>("value_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_23_pad_0 = const()[name = tensor<string, []>("value_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181957312)))];
+            tensor<fp16, [768]> layers_5_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183137024)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_23_cast_fp16 = conv(bias = layers_5_encoder_attn_v_proj_bias_to_fp16, dilations = var_1310, groups = var_1173, pad = value_23_pad_0, pad_type = value_23_pad_type_0, strides = var_1308, weight = layers_5_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_23_cast_fp16")];
+            tensor<int32, [4]> var_1314 = const()[name = tensor<string, []>("op_1314"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1315_cast_fp16 = reshape(shape = var_1314, x = query_23_cast_fp16)[name = tensor<string, []>("op_1315_cast_fp16")];
+            tensor<fp16, []> var_1316_to_fp16 = const()[name = tensor<string, []>("op_1316_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1317_cast_fp16 = mul(x = var_1315_cast_fp16, y = var_1316_to_fp16)[name = tensor<string, []>("op_1317_cast_fp16")];
+            tensor<int32, [4]> var_1318 = const()[name = tensor<string, []>("op_1318"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1319_cast_fp16 = reshape(shape = var_1318, x = key_23_cast_fp16)[name = tensor<string, []>("op_1319_cast_fp16")];
+            tensor<bool, []> mh_w_35_transpose_x_0 = const()[name = tensor<string, []>("mh_w_35_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_35_transpose_y_0 = const()[name = tensor<string, []>("mh_w_35_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_1317_cast_fp16, y = var_1319_cast_fp16)[name = tensor<string, []>("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_83_cast_fp16 = softmax(axis = var_1166, x = mh_w_35_cast_fp16)[name = tensor<string, []>("obj_83_cast_fp16")];
+            tensor<int32, [4]> var_1323 = const()[name = tensor<string, []>("op_1323"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1324_cast_fp16 = reshape(shape = var_1323, x = value_23_cast_fp16)[name = tensor<string, []>("op_1324_cast_fp16")];
+            tensor<bool, []> attn_23_transpose_x_0 = const()[name = tensor<string, []>("attn_23_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_23_transpose_y_0 = const()[name = tensor<string, []>("attn_23_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1324_cast_fp16, y = obj_83_cast_fp16)[name = tensor<string, []>("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_1327 = const()[name = tensor<string, []>("op_1327"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_53_cast_fp16 = reshape(shape = var_1327, x = attn_23_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<int32, [2]> var_1331 = const()[name = tensor<string, []>("op_1331"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1333 = const()[name = tensor<string, []>("op_1333"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_81_pad_type_0 = const()[name = tensor<string, []>("obj_81_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_81_pad_0 = const()[name = tensor<string, []>("obj_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183138624)))];
+            tensor<fp16, [768]> layers_5_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184318336)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_81_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_bias_to_fp16, dilations = var_1333, groups = var_1173, pad = obj_81_pad_0, pad_type = obj_81_pad_type_0, strides = var_1331, weight = layers_5_encoder_attn_o_proj_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("obj_81_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_81_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> var_1342 = const()[name = tensor<string, []>("op_1342"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_35_cast_fp16 = reduce_mean(axes = var_1342, keep_dims = var_1174, x = inputs_35_cast_fp16)[name = tensor<string, []>("channels_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_35_cast_fp16 = sub(x = inputs_35_cast_fp16, y = channels_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = zero_mean_35_cast_fp16)[name = tensor<string, []>("zero_mean_sq_35_cast_fp16")];
+            tensor<int32, [1]> var_1346 = const()[name = tensor<string, []>("op_1346"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1347_cast_fp16 = reduce_mean(axes = var_1346, keep_dims = var_1174, x = zero_mean_sq_35_cast_fp16)[name = tensor<string, []>("op_1347_cast_fp16")];
+            tensor<fp16, []> var_1348_to_fp16 = const()[name = tensor<string, []>("op_1348_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1349_cast_fp16 = add(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor<string, []>("op_1349_cast_fp16")];
+            tensor<fp16, []> denom_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0_to_fp16, x = var_1349_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = denom_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_55_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_55_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184319936)))];
+            tensor<fp16, [768]> input_55_beta_0_to_fp16 = const()[name = tensor<string, []>("input_55_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184321536)))];
+            tensor<fp16, []> input_55_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_55_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<int32, [2]> var_1360 = const()[name = tensor<string, []>("op_1360"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1362 = const()[name = tensor<string, []>("op_1362"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_57_pad_type_0 = const()[name = tensor<string, []>("input_57_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_57_pad_0 = const()[name = tensor<string, []>("input_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184323136)))];
+            tensor<fp16, [3072]> layers_5_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189041792)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_57_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_1362, groups = var_1173, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = var_1360, weight = layers_5_fc1_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_mode_0 = const()[name = tensor<string, []>("input_59_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<int32, [2]> var_1368 = const()[name = tensor<string, []>("op_1368"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1370 = const()[name = tensor<string, []>("op_1370"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_13_pad_type_0 = const()[name = tensor<string, []>("hidden_states_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_13_pad_0 = const()[name = tensor<string, []>("hidden_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189048000)))];
+            tensor<fp16, [768]> layers_5_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_5_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193766656)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_13_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_1370, groups = var_1173, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_1368, weight = layers_5_fc2_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_1384 = const()[name = tensor<string, []>("op_1384"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1391 = const()[name = tensor<string, []>("op_1391"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1392 = const()[name = tensor<string, []>("op_1392"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1404 = const()[name = tensor<string, []>("op_1404"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_37_cast_fp16 = reduce_mean(axes = var_1404, keep_dims = var_1392, x = inputs_37_cast_fp16)[name = tensor<string, []>("channels_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_37_cast_fp16 = sub(x = inputs_37_cast_fp16, y = channels_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = zero_mean_37_cast_fp16)[name = tensor<string, []>("zero_mean_sq_37_cast_fp16")];
+            tensor<int32, [1]> var_1408 = const()[name = tensor<string, []>("op_1408"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1409_cast_fp16 = reduce_mean(axes = var_1408, keep_dims = var_1392, x = zero_mean_sq_37_cast_fp16)[name = tensor<string, []>("op_1409_cast_fp16")];
+            tensor<fp16, []> var_1410_to_fp16 = const()[name = tensor<string, []>("op_1410_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1411_cast_fp16 = add(x = var_1409_cast_fp16, y = var_1410_to_fp16)[name = tensor<string, []>("op_1411_cast_fp16")];
+            tensor<fp16, []> denom_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0_to_fp16, x = var_1411_cast_fp16)[name = tensor<string, []>("denom_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = denom_37_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_85_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193768256)))];
+            tensor<fp16, [768]> obj_85_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_85_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193769856)))];
+            tensor<fp16, []> obj_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor<string, []>("obj_85_cast_fp16")];
+            tensor<int32, [2]> var_1426 = const()[name = tensor<string, []>("op_1426"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1428 = const()[name = tensor<string, []>("op_1428"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_25_pad_type_0 = const()[name = tensor<string, []>("query_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_25_pad_0 = const()[name = tensor<string, []>("query_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193771456)))];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194951168)))];
+            tensor<fp16, [1, 768, 1, 1]> query_25_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = var_1428, groups = var_1391, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = var_1426, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("query_25_cast_fp16")];
+            tensor<int32, [2]> var_1432 = const()[name = tensor<string, []>("op_1432"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1434 = const()[name = tensor<string, []>("op_1434"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_13_pad_type_0 = const()[name = tensor<string, []>("current_key_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_13_pad_0 = const()[name = tensor<string, []>("current_key_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194952768)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_13_cast_fp16 = conv(dilations = var_1434, groups = var_1391, pad = current_key_13_pad_0, pad_type = current_key_13_pad_type_0, strides = var_1432, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("current_key_13_cast_fp16")];
+            tensor<int32, [2]> var_1439 = const()[name = tensor<string, []>("op_1439"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1441 = const()[name = tensor<string, []>("op_1441"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_13_pad_type_0 = const()[name = tensor<string, []>("current_value_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_13_pad_0 = const()[name = tensor<string, []>("current_value_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196132480)))];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197312192)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = var_1441, groups = var_1391, pad = current_value_13_pad_0, pad_type = current_value_13_pad_type_0, strides = var_1439, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor<string, []>("current_value_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1448_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1448_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1450_cast_fp16 = mul(x = var_63_cast_fp16_6, y = var_161_cast_fp16)[name = tensor<string, []>("op_1450_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_25_cast_fp16 = add(x = var_1448_cast_fp16, y = var_1450_cast_fp16)[name = tensor<string, []>("key_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1452_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1452_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1454_cast_fp16 = mul(x = var_78_cast_fp16_6, y = var_161_cast_fp16)[name = tensor<string, []>("op_1454_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_25_cast_fp16 = add(x = var_1452_cast_fp16, y = var_1454_cast_fp16)[name = tensor<string, []>("value_25_cast_fp16")];
+            tensor<int32, [4]> var_1457 = const()[name = tensor<string, []>("op_1457"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1458_cast_fp16 = reshape(shape = var_1457, x = query_25_cast_fp16)[name = tensor<string, []>("op_1458_cast_fp16")];
+            tensor<fp16, []> var_1459_to_fp16 = const()[name = tensor<string, []>("op_1459_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1460_cast_fp16 = mul(x = var_1458_cast_fp16, y = var_1459_to_fp16)[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<int32, [4]> var_1461 = const()[name = tensor<string, []>("op_1461"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1462_cast_fp16 = reshape(shape = var_1461, x = key_25_cast_fp16)[name = tensor<string, []>("op_1462_cast_fp16")];
+            tensor<bool, []> mh_w_37_transpose_x_0 = const()[name = tensor<string, []>("mh_w_37_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_37_transpose_y_0 = const()[name = tensor<string, []>("mh_w_37_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1460_cast_fp16, y = var_1462_cast_fp16)[name = tensor<string, []>("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1470_cast_fp16 = softmax(axis = var_1384, x = mh_w_39_cast_fp16)[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<int32, [4]> var_1471 = const()[name = tensor<string, []>("op_1471"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1472_cast_fp16 = reshape(shape = var_1471, x = value_25_cast_fp16)[name = tensor<string, []>("op_1472_cast_fp16")];
+            tensor<bool, []> attn_25_transpose_x_0 = const()[name = tensor<string, []>("attn_25_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_25_transpose_y_0 = const()[name = tensor<string, []>("attn_25_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1472_cast_fp16, y = var_1470_cast_fp16)[name = tensor<string, []>("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_1475 = const()[name = tensor<string, []>("op_1475"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_61_cast_fp16 = reshape(shape = var_1475, x = attn_25_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<int32, [2]> var_1479 = const()[name = tensor<string, []>("op_1479"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1481 = const()[name = tensor<string, []>("op_1481"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_91_pad_type_0 = const()[name = tensor<string, []>("obj_91_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_91_pad_0 = const()[name = tensor<string, []>("obj_91_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197313792)))];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198493504)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_91_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = var_1481, groups = var_1391, pad = obj_91_pad_0, pad_type = obj_91_pad_type_0, strides = var_1479, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("obj_91_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_91_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> var_1491 = const()[name = tensor<string, []>("op_1491"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_39_cast_fp16 = reduce_mean(axes = var_1491, keep_dims = var_1392, x = inputs_39_cast_fp16)[name = tensor<string, []>("channels_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_39_cast_fp16 = sub(x = inputs_39_cast_fp16, y = channels_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = zero_mean_39_cast_fp16)[name = tensor<string, []>("zero_mean_sq_39_cast_fp16")];
+            tensor<int32, [1]> var_1495 = const()[name = tensor<string, []>("op_1495"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1496_cast_fp16 = reduce_mean(axes = var_1495, keep_dims = var_1392, x = zero_mean_sq_39_cast_fp16)[name = tensor<string, []>("op_1496_cast_fp16")];
+            tensor<fp16, []> var_1497_to_fp16 = const()[name = tensor<string, []>("op_1497_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1498_cast_fp16 = add(x = var_1496_cast_fp16, y = var_1497_to_fp16)[name = tensor<string, []>("op_1498_cast_fp16")];
+            tensor<fp16, []> denom_39_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_39_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0_to_fp16, x = var_1498_cast_fp16)[name = tensor<string, []>("denom_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = denom_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
+            tensor<fp16, [768]> obj_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_93_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198495104)))];
+            tensor<fp16, [768]> obj_93_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_93_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198496704)))];
+            tensor<fp16, []> obj_93_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_93_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor<string, []>("obj_93_cast_fp16")];
+            tensor<int32, [2]> var_1513 = const()[name = tensor<string, []>("op_1513"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1515 = const()[name = tensor<string, []>("op_1515"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_27_pad_type_0 = const()[name = tensor<string, []>("query_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_27_pad_0 = const()[name = tensor<string, []>("query_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198498304)))];
+            tensor<fp16, [768]> layers_6_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199678016)))];
+            tensor<fp16, [1, 768, 1, 1]> query_27_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_bias_to_fp16, dilations = var_1515, groups = var_1391, pad = query_27_pad_0, pad_type = query_27_pad_type_0, strides = var_1513, weight = layers_6_encoder_attn_q_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor<string, []>("query_27_cast_fp16")];
+            tensor<int32, [2]> var_1519 = const()[name = tensor<string, []>("op_1519"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1521 = const()[name = tensor<string, []>("op_1521"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_27_pad_type_0 = const()[name = tensor<string, []>("key_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_27_pad_0 = const()[name = tensor<string, []>("key_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199679616)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_27_cast_fp16 = conv(dilations = var_1521, groups = var_1391, pad = key_27_pad_0, pad_type = key_27_pad_type_0, strides = var_1519, weight = layers_6_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_27_cast_fp16")];
+            tensor<int32, [2]> var_1526 = const()[name = tensor<string, []>("op_1526"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1528 = const()[name = tensor<string, []>("op_1528"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_27_pad_type_0 = const()[name = tensor<string, []>("value_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_27_pad_0 = const()[name = tensor<string, []>("value_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200859328)))];
+            tensor<fp16, [768]> layers_6_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202039040)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_27_cast_fp16 = conv(bias = layers_6_encoder_attn_v_proj_bias_to_fp16, dilations = var_1528, groups = var_1391, pad = value_27_pad_0, pad_type = value_27_pad_type_0, strides = var_1526, weight = layers_6_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_27_cast_fp16")];
+            tensor<int32, [4]> var_1532 = const()[name = tensor<string, []>("op_1532"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1533_cast_fp16 = reshape(shape = var_1532, x = query_27_cast_fp16)[name = tensor<string, []>("op_1533_cast_fp16")];
+            tensor<fp16, []> var_1534_to_fp16 = const()[name = tensor<string, []>("op_1534_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1535_cast_fp16 = mul(x = var_1533_cast_fp16, y = var_1534_to_fp16)[name = tensor<string, []>("op_1535_cast_fp16")];
+            tensor<int32, [4]> var_1536 = const()[name = tensor<string, []>("op_1536"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1537_cast_fp16 = reshape(shape = var_1536, x = key_27_cast_fp16)[name = tensor<string, []>("op_1537_cast_fp16")];
+            tensor<bool, []> mh_w_41_transpose_x_0 = const()[name = tensor<string, []>("mh_w_41_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_41_transpose_y_0 = const()[name = tensor<string, []>("mh_w_41_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1535_cast_fp16, y = var_1537_cast_fp16)[name = tensor<string, []>("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_97_cast_fp16 = softmax(axis = var_1384, x = mh_w_41_cast_fp16)[name = tensor<string, []>("obj_97_cast_fp16")];
+            tensor<int32, [4]> var_1541 = const()[name = tensor<string, []>("op_1541"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1542_cast_fp16 = reshape(shape = var_1541, x = value_27_cast_fp16)[name = tensor<string, []>("op_1542_cast_fp16")];
+            tensor<bool, []> attn_27_transpose_x_0 = const()[name = tensor<string, []>("attn_27_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_27_transpose_y_0 = const()[name = tensor<string, []>("attn_27_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1542_cast_fp16, y = obj_97_cast_fp16)[name = tensor<string, []>("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_1545 = const()[name = tensor<string, []>("op_1545"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_63_cast_fp16 = reshape(shape = var_1545, x = attn_27_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<int32, [2]> var_1549 = const()[name = tensor<string, []>("op_1549"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1551 = const()[name = tensor<string, []>("op_1551"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_95_pad_type_0 = const()[name = tensor<string, []>("obj_95_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_95_pad_0 = const()[name = tensor<string, []>("obj_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202040640)))];
+            tensor<fp16, [768]> layers_6_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203220352)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_95_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_bias_to_fp16, dilations = var_1551, groups = var_1391, pad = obj_95_pad_0, pad_type = obj_95_pad_type_0, strides = var_1549, weight = layers_6_encoder_attn_o_proj_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("obj_95_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_95_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, [1]> var_1557 = const()[name = tensor<string, []>("op_1557"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_41_cast_fp16 = reduce_mean(axes = var_1557, keep_dims = var_1392, x = inputs_41_cast_fp16)[name = tensor<string, []>("channels_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_41_cast_fp16 = sub(x = inputs_41_cast_fp16, y = channels_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = zero_mean_41_cast_fp16)[name = tensor<string, []>("zero_mean_sq_41_cast_fp16")];
+            tensor<int32, [1]> var_1561 = const()[name = tensor<string, []>("op_1561"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1562_cast_fp16 = reduce_mean(axes = var_1561, keep_dims = var_1392, x = zero_mean_sq_41_cast_fp16)[name = tensor<string, []>("op_1562_cast_fp16")];
+            tensor<fp16, []> var_1563_to_fp16 = const()[name = tensor<string, []>("op_1563_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1564_cast_fp16 = add(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<fp16, []> denom_41_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_41_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0_to_fp16, x = var_1564_cast_fp16)[name = tensor<string, []>("denom_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = denom_41_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
+            tensor<fp16, [768]> input_65_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_65_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203221952)))];
+            tensor<fp16, [768]> input_65_beta_0_to_fp16 = const()[name = tensor<string, []>("input_65_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203223552)))];
+            tensor<fp16, []> input_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<int32, [2]> var_1575 = const()[name = tensor<string, []>("op_1575"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1577 = const()[name = tensor<string, []>("op_1577"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_67_pad_type_0 = const()[name = tensor<string, []>("input_67_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_67_pad_0 = const()[name = tensor<string, []>("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203225152)))];
+            tensor<fp16, [3072]> layers_6_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207943808)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_67_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = var_1577, groups = var_1391, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = var_1575, weight = layers_6_fc1_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<int32, [2]> var_1583 = const()[name = tensor<string, []>("op_1583"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1585 = const()[name = tensor<string, []>("op_1585"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_15_pad_type_0 = const()[name = tensor<string, []>("hidden_states_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = tensor<string, []>("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207950016)))];
+            tensor<fp16, [768]> layers_6_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_6_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212668672)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_15_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = var_1585, groups = var_1391, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = var_1583, weight = layers_6_fc2_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, []> var_1598 = const()[name = tensor<string, []>("op_1598"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1605 = const()[name = tensor<string, []>("op_1605"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1606 = const()[name = tensor<string, []>("op_1606"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1618 = const()[name = tensor<string, []>("op_1618"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_43_cast_fp16 = reduce_mean(axes = var_1618, keep_dims = var_1606, x = inputs_43_cast_fp16)[name = tensor<string, []>("channels_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_43_cast_fp16 = sub(x = inputs_43_cast_fp16, y = channels_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = zero_mean_43_cast_fp16)[name = tensor<string, []>("zero_mean_sq_43_cast_fp16")];
+            tensor<int32, [1]> var_1622 = const()[name = tensor<string, []>("op_1622"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1623_cast_fp16 = reduce_mean(axes = var_1622, keep_dims = var_1606, x = zero_mean_sq_43_cast_fp16)[name = tensor<string, []>("op_1623_cast_fp16")];
+            tensor<fp16, []> var_1624_to_fp16 = const()[name = tensor<string, []>("op_1624_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1625_cast_fp16 = add(x = var_1623_cast_fp16, y = var_1624_to_fp16)[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<fp16, []> denom_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0_to_fp16, x = var_1625_cast_fp16)[name = tensor<string, []>("denom_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = denom_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
+            tensor<fp16, [768]> obj_99_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_99_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212670272)))];
+            tensor<fp16, [768]> obj_99_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_99_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212671872)))];
+            tensor<fp16, []> obj_99_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_99_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_99_cast_fp16 = batch_norm(beta = obj_99_beta_0_to_fp16, epsilon = obj_99_epsilon_0_to_fp16, gamma = obj_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor<string, []>("obj_99_cast_fp16")];
+            tensor<int32, [2]> var_1640 = const()[name = tensor<string, []>("op_1640"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1642 = const()[name = tensor<string, []>("op_1642"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_29_pad_type_0 = const()[name = tensor<string, []>("query_29_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_29_pad_0 = const()[name = tensor<string, []>("query_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212673472)))];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213853184)))];
+            tensor<fp16, [1, 768, 1, 1]> query_29_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = var_1642, groups = var_1605, pad = query_29_pad_0, pad_type = query_29_pad_type_0, strides = var_1640, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("query_29_cast_fp16")];
+            tensor<int32, [2]> var_1646 = const()[name = tensor<string, []>("op_1646"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1648 = const()[name = tensor<string, []>("op_1648"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_15_pad_type_0 = const()[name = tensor<string, []>("current_key_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_15_pad_0 = const()[name = tensor<string, []>("current_key_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213854784)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_15_cast_fp16 = conv(dilations = var_1648, groups = var_1605, pad = current_key_15_pad_0, pad_type = current_key_15_pad_type_0, strides = var_1646, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("current_key_15_cast_fp16")];
+            tensor<int32, [2]> var_1653 = const()[name = tensor<string, []>("op_1653"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1655 = const()[name = tensor<string, []>("op_1655"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_15_pad_type_0 = const()[name = tensor<string, []>("current_value_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_15_pad_0 = const()[name = tensor<string, []>("current_value_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215034496)))];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216214208)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = var_1655, groups = var_1605, pad = current_value_15_pad_0, pad_type = current_value_15_pad_type_0, strides = var_1653, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_99_cast_fp16)[name = tensor<string, []>("current_value_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1662_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1664_cast_fp16 = mul(x = var_63_cast_fp16_7, y = var_161_cast_fp16)[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_29_cast_fp16 = add(x = var_1662_cast_fp16, y = var_1664_cast_fp16)[name = tensor<string, []>("key_29_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1666_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1668_cast_fp16 = mul(x = var_78_cast_fp16_7, y = var_161_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_29_cast_fp16 = add(x = var_1666_cast_fp16, y = var_1668_cast_fp16)[name = tensor<string, []>("value_29_cast_fp16")];
+            tensor<int32, [4]> var_1671 = const()[name = tensor<string, []>("op_1671"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1672_cast_fp16 = reshape(shape = var_1671, x = query_29_cast_fp16)[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<fp16, []> var_1673_to_fp16 = const()[name = tensor<string, []>("op_1673_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1674_cast_fp16 = mul(x = var_1672_cast_fp16, y = var_1673_to_fp16)[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<int32, [4]> var_1675 = const()[name = tensor<string, []>("op_1675"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1676_cast_fp16 = reshape(shape = var_1675, x = key_29_cast_fp16)[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<bool, []> mh_w_43_transpose_x_0 = const()[name = tensor<string, []>("mh_w_43_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_43_transpose_y_0 = const()[name = tensor<string, []>("mh_w_43_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_1674_cast_fp16, y = var_1676_cast_fp16)[name = tensor<string, []>("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_45_cast_fp16 = add(x = mh_w_43_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1684_cast_fp16 = softmax(axis = var_1598, x = mh_w_45_cast_fp16)[name = tensor<string, []>("op_1684_cast_fp16")];
+            tensor<int32, [4]> var_1685 = const()[name = tensor<string, []>("op_1685"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1686_cast_fp16 = reshape(shape = var_1685, x = value_29_cast_fp16)[name = tensor<string, []>("op_1686_cast_fp16")];
+            tensor<bool, []> attn_29_transpose_x_0 = const()[name = tensor<string, []>("attn_29_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_29_transpose_y_0 = const()[name = tensor<string, []>("attn_29_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_1686_cast_fp16, y = var_1684_cast_fp16)[name = tensor<string, []>("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_1689 = const()[name = tensor<string, []>("op_1689"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_71_cast_fp16 = reshape(shape = var_1689, x = attn_29_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<int32, [2]> var_1693 = const()[name = tensor<string, []>("op_1693"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1695 = const()[name = tensor<string, []>("op_1695"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_105_pad_type_0 = const()[name = tensor<string, []>("obj_105_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_105_pad_0 = const()[name = tensor<string, []>("obj_105_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216215808)))];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217395520)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_105_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = var_1695, groups = var_1605, pad = obj_105_pad_0, pad_type = obj_105_pad_type_0, strides = var_1693, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("obj_105_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_105_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, [1]> var_1705 = const()[name = tensor<string, []>("op_1705"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_45_cast_fp16 = reduce_mean(axes = var_1705, keep_dims = var_1606, x = inputs_45_cast_fp16)[name = tensor<string, []>("channels_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_45_cast_fp16 = sub(x = inputs_45_cast_fp16, y = channels_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = zero_mean_45_cast_fp16)[name = tensor<string, []>("zero_mean_sq_45_cast_fp16")];
+            tensor<int32, [1]> var_1709 = const()[name = tensor<string, []>("op_1709"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1710_cast_fp16 = reduce_mean(axes = var_1709, keep_dims = var_1606, x = zero_mean_sq_45_cast_fp16)[name = tensor<string, []>("op_1710_cast_fp16")];
+            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1712_cast_fp16 = add(x = var_1710_cast_fp16, y = var_1711_to_fp16)[name = tensor<string, []>("op_1712_cast_fp16")];
+            tensor<fp16, []> denom_45_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_45_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0_to_fp16, x = var_1712_cast_fp16)[name = tensor<string, []>("denom_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = denom_45_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_107_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217397120)))];
+            tensor<fp16, [768]> obj_107_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_107_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217398720)))];
+            tensor<fp16, []> obj_107_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_107_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_107_cast_fp16 = batch_norm(beta = obj_107_beta_0_to_fp16, epsilon = obj_107_epsilon_0_to_fp16, gamma = obj_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor<string, []>("obj_107_cast_fp16")];
+            tensor<int32, [2]> var_1727 = const()[name = tensor<string, []>("op_1727"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1729 = const()[name = tensor<string, []>("op_1729"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_31_pad_type_0 = const()[name = tensor<string, []>("query_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_31_pad_0 = const()[name = tensor<string, []>("query_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217400320)))];
+            tensor<fp16, [768]> layers_7_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(218580032)))];
+            tensor<fp16, [1, 768, 1, 1]> query_31_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_bias_to_fp16, dilations = var_1729, groups = var_1605, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = var_1727, weight = layers_7_encoder_attn_q_proj_weight_to_fp16, x = obj_107_cast_fp16)[name = tensor<string, []>("query_31_cast_fp16")];
+            tensor<int32, [2]> var_1733 = const()[name = tensor<string, []>("op_1733"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1735 = const()[name = tensor<string, []>("op_1735"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_31_pad_type_0 = const()[name = tensor<string, []>("key_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_31_pad_0 = const()[name = tensor<string, []>("key_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(218581632)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_31_cast_fp16 = conv(dilations = var_1735, groups = var_1605, pad = key_31_pad_0, pad_type = key_31_pad_type_0, strides = var_1733, weight = layers_7_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_31_cast_fp16")];
+            tensor<int32, [2]> var_1740 = const()[name = tensor<string, []>("op_1740"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1742 = const()[name = tensor<string, []>("op_1742"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_31_pad_type_0 = const()[name = tensor<string, []>("value_31_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_31_pad_0 = const()[name = tensor<string, []>("value_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219761344)))];
+            tensor<fp16, [768]> layers_7_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220941056)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_31_cast_fp16 = conv(bias = layers_7_encoder_attn_v_proj_bias_to_fp16, dilations = var_1742, groups = var_1605, pad = value_31_pad_0, pad_type = value_31_pad_type_0, strides = var_1740, weight = layers_7_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_31_cast_fp16")];
+            tensor<int32, [4]> var_1746 = const()[name = tensor<string, []>("op_1746"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1747_cast_fp16 = reshape(shape = var_1746, x = query_31_cast_fp16)[name = tensor<string, []>("op_1747_cast_fp16")];
+            tensor<fp16, []> var_1748_to_fp16 = const()[name = tensor<string, []>("op_1748_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1749_cast_fp16 = mul(x = var_1747_cast_fp16, y = var_1748_to_fp16)[name = tensor<string, []>("op_1749_cast_fp16")];
+            tensor<int32, [4]> var_1750 = const()[name = tensor<string, []>("op_1750"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1751_cast_fp16 = reshape(shape = var_1750, x = key_31_cast_fp16)[name = tensor<string, []>("op_1751_cast_fp16")];
+            tensor<bool, []> mh_w_47_transpose_x_0 = const()[name = tensor<string, []>("mh_w_47_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_47_transpose_y_0 = const()[name = tensor<string, []>("mh_w_47_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_1749_cast_fp16, y = var_1751_cast_fp16)[name = tensor<string, []>("mh_w_47_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_111_cast_fp16 = softmax(axis = var_1598, x = mh_w_47_cast_fp16)[name = tensor<string, []>("obj_111_cast_fp16")];
+            tensor<int32, [4]> var_1755 = const()[name = tensor<string, []>("op_1755"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1756_cast_fp16 = reshape(shape = var_1755, x = value_31_cast_fp16)[name = tensor<string, []>("op_1756_cast_fp16")];
+            tensor<bool, []> attn_31_transpose_x_0 = const()[name = tensor<string, []>("attn_31_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_31_transpose_y_0 = const()[name = tensor<string, []>("attn_31_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_1756_cast_fp16, y = obj_111_cast_fp16)[name = tensor<string, []>("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_1759 = const()[name = tensor<string, []>("op_1759"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_73_cast_fp16 = reshape(shape = var_1759, x = attn_31_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<int32, [2]> var_1763 = const()[name = tensor<string, []>("op_1763"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1765 = const()[name = tensor<string, []>("op_1765"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_109_pad_type_0 = const()[name = tensor<string, []>("obj_109_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_109_pad_0 = const()[name = tensor<string, []>("obj_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220942656)))];
+            tensor<fp16, [768]> layers_7_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222122368)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_109_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_bias_to_fp16, dilations = var_1765, groups = var_1605, pad = obj_109_pad_0, pad_type = obj_109_pad_type_0, strides = var_1763, weight = layers_7_encoder_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("obj_109_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_109_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> var_1771 = const()[name = tensor<string, []>("op_1771"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_47_cast_fp16 = reduce_mean(axes = var_1771, keep_dims = var_1606, x = inputs_47_cast_fp16)[name = tensor<string, []>("channels_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_47_cast_fp16 = sub(x = inputs_47_cast_fp16, y = channels_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = zero_mean_47_cast_fp16)[name = tensor<string, []>("zero_mean_sq_47_cast_fp16")];
+            tensor<int32, [1]> var_1775 = const()[name = tensor<string, []>("op_1775"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1776_cast_fp16 = reduce_mean(axes = var_1775, keep_dims = var_1606, x = zero_mean_sq_47_cast_fp16)[name = tensor<string, []>("op_1776_cast_fp16")];
+            tensor<fp16, []> var_1777_to_fp16 = const()[name = tensor<string, []>("op_1777_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1778_cast_fp16 = add(x = var_1776_cast_fp16, y = var_1777_to_fp16)[name = tensor<string, []>("op_1778_cast_fp16")];
+            tensor<fp16, []> denom_47_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_47_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0_to_fp16, x = var_1778_cast_fp16)[name = tensor<string, []>("denom_47_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = denom_47_cast_fp16)[name = tensor<string, []>("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222123968)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = tensor<string, []>("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222125568)))];
+            tensor<fp16, []> input_75_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_75_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<int32, [2]> var_1789 = const()[name = tensor<string, []>("op_1789"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1791 = const()[name = tensor<string, []>("op_1791"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_77_pad_type_0 = const()[name = tensor<string, []>("input_77_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_77_pad_0 = const()[name = tensor<string, []>("input_77_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222127168)))];
+            tensor<fp16, [3072]> layers_7_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226845824)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_77_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = var_1791, groups = var_1605, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = var_1789, weight = layers_7_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<int32, [2]> var_1797 = const()[name = tensor<string, []>("op_1797"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1799 = const()[name = tensor<string, []>("op_1799"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_17_pad_type_0 = const()[name = tensor<string, []>("hidden_states_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_17_pad_0 = const()[name = tensor<string, []>("hidden_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226852032)))];
+            tensor<fp16, [768]> layers_7_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_7_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231570688)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_17_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = var_1799, groups = var_1605, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = var_1797, weight = layers_7_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_1812 = const()[name = tensor<string, []>("op_1812"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1819 = const()[name = tensor<string, []>("op_1819"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1820 = const()[name = tensor<string, []>("op_1820"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1832 = const()[name = tensor<string, []>("op_1832"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_49_cast_fp16 = reduce_mean(axes = var_1832, keep_dims = var_1820, x = inputs_49_cast_fp16)[name = tensor<string, []>("channels_mean_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_49_cast_fp16 = sub(x = inputs_49_cast_fp16, y = channels_mean_49_cast_fp16)[name = tensor<string, []>("zero_mean_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_49_cast_fp16 = mul(x = zero_mean_49_cast_fp16, y = zero_mean_49_cast_fp16)[name = tensor<string, []>("zero_mean_sq_49_cast_fp16")];
+            tensor<int32, [1]> var_1836 = const()[name = tensor<string, []>("op_1836"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1837_cast_fp16 = reduce_mean(axes = var_1836, keep_dims = var_1820, x = zero_mean_sq_49_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<fp16, []> var_1838_to_fp16 = const()[name = tensor<string, []>("op_1838_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1839_cast_fp16 = add(x = var_1837_cast_fp16, y = var_1838_to_fp16)[name = tensor<string, []>("op_1839_cast_fp16")];
+            tensor<fp16, []> denom_49_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_49_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0_to_fp16, x = var_1839_cast_fp16)[name = tensor<string, []>("denom_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_49_cast_fp16 = mul(x = zero_mean_49_cast_fp16, y = denom_49_cast_fp16)[name = tensor<string, []>("out_49_cast_fp16")];
+            tensor<fp16, [768]> obj_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_113_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231572288)))];
+            tensor<fp16, [768]> obj_113_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_113_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231573888)))];
+            tensor<fp16, []> obj_113_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_113_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor<string, []>("obj_113_cast_fp16")];
+            tensor<int32, [2]> var_1854 = const()[name = tensor<string, []>("op_1854"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1856 = const()[name = tensor<string, []>("op_1856"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_33_pad_type_0 = const()[name = tensor<string, []>("query_33_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_33_pad_0 = const()[name = tensor<string, []>("query_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231575488)))];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(232755200)))];
+            tensor<fp16, [1, 768, 1, 1]> query_33_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = var_1856, groups = var_1819, pad = query_33_pad_0, pad_type = query_33_pad_type_0, strides = var_1854, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("query_33_cast_fp16")];
+            tensor<int32, [2]> var_1860 = const()[name = tensor<string, []>("op_1860"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1862 = const()[name = tensor<string, []>("op_1862"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_17_pad_type_0 = const()[name = tensor<string, []>("current_key_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_17_pad_0 = const()[name = tensor<string, []>("current_key_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(232756800)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_17_cast_fp16 = conv(dilations = var_1862, groups = var_1819, pad = current_key_17_pad_0, pad_type = current_key_17_pad_type_0, strides = var_1860, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("current_key_17_cast_fp16")];
+            tensor<int32, [2]> var_1867 = const()[name = tensor<string, []>("op_1867"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1869 = const()[name = tensor<string, []>("op_1869"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_17_pad_type_0 = const()[name = tensor<string, []>("current_value_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_17_pad_0 = const()[name = tensor<string, []>("current_value_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(233936512)))];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235116224)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = var_1869, groups = var_1819, pad = current_value_17_pad_0, pad_type = current_value_17_pad_type_0, strides = var_1867, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor<string, []>("current_value_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1876_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1876_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1878_cast_fp16 = mul(x = var_63_cast_fp16_8, y = var_161_cast_fp16)[name = tensor<string, []>("op_1878_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_33_cast_fp16 = add(x = var_1876_cast_fp16, y = var_1878_cast_fp16)[name = tensor<string, []>("key_33_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1880_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_1880_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_1882_cast_fp16 = mul(x = var_78_cast_fp16_8, y = var_161_cast_fp16)[name = tensor<string, []>("op_1882_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_33_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1882_cast_fp16)[name = tensor<string, []>("value_33_cast_fp16")];
+            tensor<int32, [4]> var_1885 = const()[name = tensor<string, []>("op_1885"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1886_cast_fp16 = reshape(shape = var_1885, x = query_33_cast_fp16)[name = tensor<string, []>("op_1886_cast_fp16")];
+            tensor<fp16, []> var_1887_to_fp16 = const()[name = tensor<string, []>("op_1887_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1888_cast_fp16 = mul(x = var_1886_cast_fp16, y = var_1887_to_fp16)[name = tensor<string, []>("op_1888_cast_fp16")];
+            tensor<int32, [4]> var_1889 = const()[name = tensor<string, []>("op_1889"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1890_cast_fp16 = reshape(shape = var_1889, x = key_33_cast_fp16)[name = tensor<string, []>("op_1890_cast_fp16")];
+            tensor<bool, []> mh_w_49_transpose_x_0 = const()[name = tensor<string, []>("mh_w_49_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_49_transpose_y_0 = const()[name = tensor<string, []>("mh_w_49_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1888_cast_fp16, y = var_1890_cast_fp16)[name = tensor<string, []>("mh_w_49_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_51_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_1898_cast_fp16 = softmax(axis = var_1812, x = mh_w_51_cast_fp16)[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<int32, [4]> var_1899 = const()[name = tensor<string, []>("op_1899"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_1900_cast_fp16 = reshape(shape = var_1899, x = value_33_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<bool, []> attn_33_transpose_x_0 = const()[name = tensor<string, []>("attn_33_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_33_transpose_y_0 = const()[name = tensor<string, []>("attn_33_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_1900_cast_fp16, y = var_1898_cast_fp16)[name = tensor<string, []>("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_1903 = const()[name = tensor<string, []>("op_1903"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_81_cast_fp16 = reshape(shape = var_1903, x = attn_33_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<int32, [2]> var_1907 = const()[name = tensor<string, []>("op_1907"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1909 = const()[name = tensor<string, []>("op_1909"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_119_pad_type_0 = const()[name = tensor<string, []>("obj_119_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_119_pad_0 = const()[name = tensor<string, []>("obj_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235117824)))];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236297536)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_119_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = var_1909, groups = var_1819, pad = obj_119_pad_0, pad_type = obj_119_pad_type_0, strides = var_1907, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("obj_119_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_119_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> var_1919 = const()[name = tensor<string, []>("op_1919"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_51_cast_fp16 = reduce_mean(axes = var_1919, keep_dims = var_1820, x = inputs_51_cast_fp16)[name = tensor<string, []>("channels_mean_51_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_51_cast_fp16 = sub(x = inputs_51_cast_fp16, y = channels_mean_51_cast_fp16)[name = tensor<string, []>("zero_mean_51_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_51_cast_fp16 = mul(x = zero_mean_51_cast_fp16, y = zero_mean_51_cast_fp16)[name = tensor<string, []>("zero_mean_sq_51_cast_fp16")];
+            tensor<int32, [1]> var_1923 = const()[name = tensor<string, []>("op_1923"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1924_cast_fp16 = reduce_mean(axes = var_1923, keep_dims = var_1820, x = zero_mean_sq_51_cast_fp16)[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<fp16, []> var_1925_to_fp16 = const()[name = tensor<string, []>("op_1925_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1926_cast_fp16 = add(x = var_1924_cast_fp16, y = var_1925_to_fp16)[name = tensor<string, []>("op_1926_cast_fp16")];
+            tensor<fp16, []> denom_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_51_cast_fp16 = rsqrt(epsilon = denom_51_epsilon_0_to_fp16, x = var_1926_cast_fp16)[name = tensor<string, []>("denom_51_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_51_cast_fp16 = mul(x = zero_mean_51_cast_fp16, y = denom_51_cast_fp16)[name = tensor<string, []>("out_51_cast_fp16")];
+            tensor<fp16, [768]> obj_121_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_121_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236299136)))];
+            tensor<fp16, [768]> obj_121_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_121_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236300736)))];
+            tensor<fp16, []> obj_121_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_121_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor<string, []>("obj_121_cast_fp16")];
+            tensor<int32, [2]> var_1941 = const()[name = tensor<string, []>("op_1941"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1943 = const()[name = tensor<string, []>("op_1943"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_35_pad_type_0 = const()[name = tensor<string, []>("query_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_35_pad_0 = const()[name = tensor<string, []>("query_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236302336)))];
+            tensor<fp16, [768]> layers_8_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237482048)))];
+            tensor<fp16, [1, 768, 1, 1]> query_35_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_bias_to_fp16, dilations = var_1943, groups = var_1819, pad = query_35_pad_0, pad_type = query_35_pad_type_0, strides = var_1941, weight = layers_8_encoder_attn_q_proj_weight_to_fp16, x = obj_121_cast_fp16)[name = tensor<string, []>("query_35_cast_fp16")];
+            tensor<int32, [2]> var_1947 = const()[name = tensor<string, []>("op_1947"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1949 = const()[name = tensor<string, []>("op_1949"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_35_pad_type_0 = const()[name = tensor<string, []>("key_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_35_pad_0 = const()[name = tensor<string, []>("key_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237483648)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_35_cast_fp16 = conv(dilations = var_1949, groups = var_1819, pad = key_35_pad_0, pad_type = key_35_pad_type_0, strides = var_1947, weight = layers_8_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_35_cast_fp16")];
+            tensor<int32, [2]> var_1954 = const()[name = tensor<string, []>("op_1954"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1956 = const()[name = tensor<string, []>("op_1956"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_35_pad_type_0 = const()[name = tensor<string, []>("value_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_35_pad_0 = const()[name = tensor<string, []>("value_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238663360)))];
+            tensor<fp16, [768]> layers_8_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239843072)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_35_cast_fp16 = conv(bias = layers_8_encoder_attn_v_proj_bias_to_fp16, dilations = var_1956, groups = var_1819, pad = value_35_pad_0, pad_type = value_35_pad_type_0, strides = var_1954, weight = layers_8_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_35_cast_fp16")];
+            tensor<int32, [4]> var_1960 = const()[name = tensor<string, []>("op_1960"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_1961_cast_fp16 = reshape(shape = var_1960, x = query_35_cast_fp16)[name = tensor<string, []>("op_1961_cast_fp16")];
+            tensor<fp16, []> var_1962_to_fp16 = const()[name = tensor<string, []>("op_1962_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1963_cast_fp16 = mul(x = var_1961_cast_fp16, y = var_1962_to_fp16)[name = tensor<string, []>("op_1963_cast_fp16")];
+            tensor<int32, [4]> var_1964 = const()[name = tensor<string, []>("op_1964"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1965_cast_fp16 = reshape(shape = var_1964, x = key_35_cast_fp16)[name = tensor<string, []>("op_1965_cast_fp16")];
+            tensor<bool, []> mh_w_53_transpose_x_0 = const()[name = tensor<string, []>("mh_w_53_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_53_transpose_y_0 = const()[name = tensor<string, []>("mh_w_53_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_1963_cast_fp16, y = var_1965_cast_fp16)[name = tensor<string, []>("mh_w_53_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_125_cast_fp16 = softmax(axis = var_1812, x = mh_w_53_cast_fp16)[name = tensor<string, []>("obj_125_cast_fp16")];
+            tensor<int32, [4]> var_1969 = const()[name = tensor<string, []>("op_1969"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1970_cast_fp16 = reshape(shape = var_1969, x = value_35_cast_fp16)[name = tensor<string, []>("op_1970_cast_fp16")];
+            tensor<bool, []> attn_35_transpose_x_0 = const()[name = tensor<string, []>("attn_35_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_35_transpose_y_0 = const()[name = tensor<string, []>("attn_35_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_1970_cast_fp16, y = obj_125_cast_fp16)[name = tensor<string, []>("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_1973 = const()[name = tensor<string, []>("op_1973"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_83_cast_fp16 = reshape(shape = var_1973, x = attn_35_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<int32, [2]> var_1977 = const()[name = tensor<string, []>("op_1977"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1979 = const()[name = tensor<string, []>("op_1979"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_123_pad_type_0 = const()[name = tensor<string, []>("obj_123_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_123_pad_0 = const()[name = tensor<string, []>("obj_123_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239844672)))];
+            tensor<fp16, [768]> layers_8_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241024384)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_123_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_bias_to_fp16, dilations = var_1979, groups = var_1819, pad = obj_123_pad_0, pad_type = obj_123_pad_type_0, strides = var_1977, weight = layers_8_encoder_attn_o_proj_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("obj_123_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_123_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, [1]> var_1988 = const()[name = tensor<string, []>("op_1988"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_53_cast_fp16 = reduce_mean(axes = var_1988, keep_dims = var_1820, x = inputs_53_cast_fp16)[name = tensor<string, []>("channels_mean_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_53_cast_fp16 = sub(x = inputs_53_cast_fp16, y = channels_mean_53_cast_fp16)[name = tensor<string, []>("zero_mean_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_53_cast_fp16 = mul(x = zero_mean_53_cast_fp16, y = zero_mean_53_cast_fp16)[name = tensor<string, []>("zero_mean_sq_53_cast_fp16")];
+            tensor<int32, [1]> var_1992 = const()[name = tensor<string, []>("op_1992"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_1993_cast_fp16 = reduce_mean(axes = var_1992, keep_dims = var_1820, x = zero_mean_sq_53_cast_fp16)[name = tensor<string, []>("op_1993_cast_fp16")];
+            tensor<fp16, []> var_1994_to_fp16 = const()[name = tensor<string, []>("op_1994_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_1995_cast_fp16 = add(x = var_1993_cast_fp16, y = var_1994_to_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<fp16, []> denom_53_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_53_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_53_cast_fp16 = rsqrt(epsilon = denom_53_epsilon_0_to_fp16, x = var_1995_cast_fp16)[name = tensor<string, []>("denom_53_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_53_cast_fp16 = mul(x = zero_mean_53_cast_fp16, y = denom_53_cast_fp16)[name = tensor<string, []>("out_53_cast_fp16")];
+            tensor<fp16, [768]> input_85_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_85_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241025984)))];
+            tensor<fp16, [768]> input_85_beta_0_to_fp16 = const()[name = tensor<string, []>("input_85_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241027584)))];
+            tensor<fp16, []> input_85_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_85_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<int32, [2]> var_2006 = const()[name = tensor<string, []>("op_2006"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2008 = const()[name = tensor<string, []>("op_2008"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_87_pad_type_0 = const()[name = tensor<string, []>("input_87_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_87_pad_0 = const()[name = tensor<string, []>("input_87_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241029184)))];
+            tensor<fp16, [3072]> layers_8_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245747840)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_87_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = var_2008, groups = var_1819, pad = input_87_pad_0, pad_type = input_87_pad_type_0, strides = var_2006, weight = layers_8_fc1_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_mode_0 = const()[name = tensor<string, []>("input_89_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<int32, [2]> var_2014 = const()[name = tensor<string, []>("op_2014"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2016 = const()[name = tensor<string, []>("op_2016"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_19_pad_type_0 = const()[name = tensor<string, []>("hidden_states_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_19_pad_0 = const()[name = tensor<string, []>("hidden_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245754048)))];
+            tensor<fp16, [768]> layers_8_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_8_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250472704)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_19_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = var_2016, groups = var_1819, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = var_2014, weight = layers_8_fc2_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, []> var_2030 = const()[name = tensor<string, []>("op_2030"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2037 = const()[name = tensor<string, []>("op_2037"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2038 = const()[name = tensor<string, []>("op_2038"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2050 = const()[name = tensor<string, []>("op_2050"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_55_cast_fp16 = reduce_mean(axes = var_2050, keep_dims = var_2038, x = inputs_55_cast_fp16)[name = tensor<string, []>("channels_mean_55_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_55_cast_fp16 = sub(x = inputs_55_cast_fp16, y = channels_mean_55_cast_fp16)[name = tensor<string, []>("zero_mean_55_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_55_cast_fp16 = mul(x = zero_mean_55_cast_fp16, y = zero_mean_55_cast_fp16)[name = tensor<string, []>("zero_mean_sq_55_cast_fp16")];
+            tensor<int32, [1]> var_2054 = const()[name = tensor<string, []>("op_2054"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2055_cast_fp16 = reduce_mean(axes = var_2054, keep_dims = var_2038, x = zero_mean_sq_55_cast_fp16)[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<fp16, []> var_2056_to_fp16 = const()[name = tensor<string, []>("op_2056_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2057_cast_fp16 = add(x = var_2055_cast_fp16, y = var_2056_to_fp16)[name = tensor<string, []>("op_2057_cast_fp16")];
+            tensor<fp16, []> denom_55_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_55_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_55_cast_fp16 = rsqrt(epsilon = denom_55_epsilon_0_to_fp16, x = var_2057_cast_fp16)[name = tensor<string, []>("denom_55_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_55_cast_fp16 = mul(x = zero_mean_55_cast_fp16, y = denom_55_cast_fp16)[name = tensor<string, []>("out_55_cast_fp16")];
+            tensor<fp16, [768]> obj_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_127_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250474304)))];
+            tensor<fp16, [768]> obj_127_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_127_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250475904)))];
+            tensor<fp16, []> obj_127_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_127_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_127_cast_fp16 = batch_norm(beta = obj_127_beta_0_to_fp16, epsilon = obj_127_epsilon_0_to_fp16, gamma = obj_127_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor<string, []>("obj_127_cast_fp16")];
+            tensor<int32, [2]> var_2072 = const()[name = tensor<string, []>("op_2072"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2074 = const()[name = tensor<string, []>("op_2074"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_37_pad_type_0 = const()[name = tensor<string, []>("query_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_37_pad_0 = const()[name = tensor<string, []>("query_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250477504)))];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251657216)))];
+            tensor<fp16, [1, 768, 1, 1]> query_37_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = var_2074, groups = var_2037, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = var_2072, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("query_37_cast_fp16")];
+            tensor<int32, [2]> var_2078 = const()[name = tensor<string, []>("op_2078"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_19_pad_type_0 = const()[name = tensor<string, []>("current_key_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_19_pad_0 = const()[name = tensor<string, []>("current_key_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(251658816)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_19_cast_fp16 = conv(dilations = var_2080, groups = var_2037, pad = current_key_19_pad_0, pad_type = current_key_19_pad_type_0, strides = var_2078, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("current_key_19_cast_fp16")];
+            tensor<int32, [2]> var_2085 = const()[name = tensor<string, []>("op_2085"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2087 = const()[name = tensor<string, []>("op_2087"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_19_pad_type_0 = const()[name = tensor<string, []>("current_value_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_19_pad_0 = const()[name = tensor<string, []>("current_value_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(252838528)))];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254018240)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = var_2087, groups = var_2037, pad = current_value_19_pad_0, pad_type = current_value_19_pad_type_0, strides = var_2085, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_127_cast_fp16)[name = tensor<string, []>("current_value_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2094_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2094_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2096_cast_fp16 = mul(x = var_63_cast_fp16_9, y = var_161_cast_fp16)[name = tensor<string, []>("op_2096_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_37_cast_fp16 = add(x = var_2094_cast_fp16, y = var_2096_cast_fp16)[name = tensor<string, []>("key_37_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2098_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2098_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2100_cast_fp16 = mul(x = var_78_cast_fp16_9, y = var_161_cast_fp16)[name = tensor<string, []>("op_2100_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_37_cast_fp16 = add(x = var_2098_cast_fp16, y = var_2100_cast_fp16)[name = tensor<string, []>("value_37_cast_fp16")];
+            tensor<int32, [4]> var_2103 = const()[name = tensor<string, []>("op_2103"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2104_cast_fp16 = reshape(shape = var_2103, x = query_37_cast_fp16)[name = tensor<string, []>("op_2104_cast_fp16")];
+            tensor<fp16, []> var_2105_to_fp16 = const()[name = tensor<string, []>("op_2105_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2106_cast_fp16 = mul(x = var_2104_cast_fp16, y = var_2105_to_fp16)[name = tensor<string, []>("op_2106_cast_fp16")];
+            tensor<int32, [4]> var_2107 = const()[name = tensor<string, []>("op_2107"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2108_cast_fp16 = reshape(shape = var_2107, x = key_37_cast_fp16)[name = tensor<string, []>("op_2108_cast_fp16")];
+            tensor<bool, []> mh_w_55_transpose_x_0 = const()[name = tensor<string, []>("mh_w_55_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_55_transpose_y_0 = const()[name = tensor<string, []>("mh_w_55_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_2106_cast_fp16, y = var_2108_cast_fp16)[name = tensor<string, []>("mh_w_55_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_57_cast_fp16 = add(x = mh_w_55_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_57_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_2116_cast_fp16 = softmax(axis = var_2030, x = mh_w_57_cast_fp16)[name = tensor<string, []>("op_2116_cast_fp16")];
+            tensor<int32, [4]> var_2117 = const()[name = tensor<string, []>("op_2117"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2118_cast_fp16 = reshape(shape = var_2117, x = value_37_cast_fp16)[name = tensor<string, []>("op_2118_cast_fp16")];
+            tensor<bool, []> attn_37_transpose_x_0 = const()[name = tensor<string, []>("attn_37_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_37_transpose_y_0 = const()[name = tensor<string, []>("attn_37_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2118_cast_fp16, y = var_2116_cast_fp16)[name = tensor<string, []>("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_2121 = const()[name = tensor<string, []>("op_2121"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_91_cast_fp16 = reshape(shape = var_2121, x = attn_37_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<int32, [2]> var_2125 = const()[name = tensor<string, []>("op_2125"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2127 = const()[name = tensor<string, []>("op_2127"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_133_pad_type_0 = const()[name = tensor<string, []>("obj_133_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_133_pad_0 = const()[name = tensor<string, []>("obj_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254019840)))];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255199552)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_133_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = var_2127, groups = var_2037, pad = obj_133_pad_0, pad_type = obj_133_pad_type_0, strides = var_2125, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("obj_133_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_133_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, [1]> var_2137 = const()[name = tensor<string, []>("op_2137"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_57_cast_fp16 = reduce_mean(axes = var_2137, keep_dims = var_2038, x = inputs_57_cast_fp16)[name = tensor<string, []>("channels_mean_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_57_cast_fp16 = sub(x = inputs_57_cast_fp16, y = channels_mean_57_cast_fp16)[name = tensor<string, []>("zero_mean_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_57_cast_fp16 = mul(x = zero_mean_57_cast_fp16, y = zero_mean_57_cast_fp16)[name = tensor<string, []>("zero_mean_sq_57_cast_fp16")];
+            tensor<int32, [1]> var_2141 = const()[name = tensor<string, []>("op_2141"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2142_cast_fp16 = reduce_mean(axes = var_2141, keep_dims = var_2038, x = zero_mean_sq_57_cast_fp16)[name = tensor<string, []>("op_2142_cast_fp16")];
+            tensor<fp16, []> var_2143_to_fp16 = const()[name = tensor<string, []>("op_2143_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2144_cast_fp16 = add(x = var_2142_cast_fp16, y = var_2143_to_fp16)[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<fp16, []> denom_57_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_57_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_57_cast_fp16 = rsqrt(epsilon = denom_57_epsilon_0_to_fp16, x = var_2144_cast_fp16)[name = tensor<string, []>("denom_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_57_cast_fp16 = mul(x = zero_mean_57_cast_fp16, y = denom_57_cast_fp16)[name = tensor<string, []>("out_57_cast_fp16")];
+            tensor<fp16, [768]> obj_135_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_135_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255201152)))];
+            tensor<fp16, [768]> obj_135_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_135_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255202752)))];
+            tensor<fp16, []> obj_135_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_135_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_135_cast_fp16 = batch_norm(beta = obj_135_beta_0_to_fp16, epsilon = obj_135_epsilon_0_to_fp16, gamma = obj_135_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor<string, []>("obj_135_cast_fp16")];
+            tensor<int32, [2]> var_2159 = const()[name = tensor<string, []>("op_2159"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2161 = const()[name = tensor<string, []>("op_2161"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_39_pad_type_0 = const()[name = tensor<string, []>("query_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_39_pad_0 = const()[name = tensor<string, []>("query_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255204352)))];
+            tensor<fp16, [768]> layers_9_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256384064)))];
+            tensor<fp16, [1, 768, 1, 1]> query_39_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_bias_to_fp16, dilations = var_2161, groups = var_2037, pad = query_39_pad_0, pad_type = query_39_pad_type_0, strides = var_2159, weight = layers_9_encoder_attn_q_proj_weight_to_fp16, x = obj_135_cast_fp16)[name = tensor<string, []>("query_39_cast_fp16")];
+            tensor<int32, [2]> var_2165 = const()[name = tensor<string, []>("op_2165"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2167 = const()[name = tensor<string, []>("op_2167"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_39_pad_type_0 = const()[name = tensor<string, []>("key_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_39_pad_0 = const()[name = tensor<string, []>("key_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256385664)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_39_cast_fp16 = conv(dilations = var_2167, groups = var_2037, pad = key_39_pad_0, pad_type = key_39_pad_type_0, strides = var_2165, weight = layers_9_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_39_cast_fp16")];
+            tensor<int32, [2]> var_2172 = const()[name = tensor<string, []>("op_2172"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2174 = const()[name = tensor<string, []>("op_2174"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_39_pad_type_0 = const()[name = tensor<string, []>("value_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_39_pad_0 = const()[name = tensor<string, []>("value_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257565376)))];
+            tensor<fp16, [768]> layers_9_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258745088)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_39_cast_fp16 = conv(bias = layers_9_encoder_attn_v_proj_bias_to_fp16, dilations = var_2174, groups = var_2037, pad = value_39_pad_0, pad_type = value_39_pad_type_0, strides = var_2172, weight = layers_9_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_39_cast_fp16")];
+            tensor<int32, [4]> var_2178 = const()[name = tensor<string, []>("op_2178"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2179_cast_fp16 = reshape(shape = var_2178, x = query_39_cast_fp16)[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<fp16, []> var_2180_to_fp16 = const()[name = tensor<string, []>("op_2180_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2181_cast_fp16 = mul(x = var_2179_cast_fp16, y = var_2180_to_fp16)[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<int32, [4]> var_2182 = const()[name = tensor<string, []>("op_2182"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2183_cast_fp16 = reshape(shape = var_2182, x = key_39_cast_fp16)[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<bool, []> mh_w_59_transpose_x_0 = const()[name = tensor<string, []>("mh_w_59_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_59_transpose_y_0 = const()[name = tensor<string, []>("mh_w_59_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_2181_cast_fp16, y = var_2183_cast_fp16)[name = tensor<string, []>("mh_w_59_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_139_cast_fp16 = softmax(axis = var_2030, x = mh_w_59_cast_fp16)[name = tensor<string, []>("obj_139_cast_fp16")];
+            tensor<int32, [4]> var_2187 = const()[name = tensor<string, []>("op_2187"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2188_cast_fp16 = reshape(shape = var_2187, x = value_39_cast_fp16)[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<bool, []> attn_39_transpose_x_0 = const()[name = tensor<string, []>("attn_39_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_39_transpose_y_0 = const()[name = tensor<string, []>("attn_39_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2188_cast_fp16, y = obj_139_cast_fp16)[name = tensor<string, []>("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_2191 = const()[name = tensor<string, []>("op_2191"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_93_cast_fp16 = reshape(shape = var_2191, x = attn_39_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<int32, [2]> var_2195 = const()[name = tensor<string, []>("op_2195"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2197 = const()[name = tensor<string, []>("op_2197"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_137_pad_type_0 = const()[name = tensor<string, []>("obj_137_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_137_pad_0 = const()[name = tensor<string, []>("obj_137_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258746688)))];
+            tensor<fp16, [768]> layers_9_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259926400)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_137_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_bias_to_fp16, dilations = var_2197, groups = var_2037, pad = obj_137_pad_0, pad_type = obj_137_pad_type_0, strides = var_2195, weight = layers_9_encoder_attn_o_proj_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("obj_137_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_137_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> var_2206 = const()[name = tensor<string, []>("op_2206"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_59_cast_fp16 = reduce_mean(axes = var_2206, keep_dims = var_2038, x = inputs_59_cast_fp16)[name = tensor<string, []>("channels_mean_59_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_59_cast_fp16 = sub(x = inputs_59_cast_fp16, y = channels_mean_59_cast_fp16)[name = tensor<string, []>("zero_mean_59_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_59_cast_fp16 = mul(x = zero_mean_59_cast_fp16, y = zero_mean_59_cast_fp16)[name = tensor<string, []>("zero_mean_sq_59_cast_fp16")];
+            tensor<int32, [1]> var_2210 = const()[name = tensor<string, []>("op_2210"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2211_cast_fp16 = reduce_mean(axes = var_2210, keep_dims = var_2038, x = zero_mean_sq_59_cast_fp16)[name = tensor<string, []>("op_2211_cast_fp16")];
+            tensor<fp16, []> var_2212_to_fp16 = const()[name = tensor<string, []>("op_2212_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2213_cast_fp16 = add(x = var_2211_cast_fp16, y = var_2212_to_fp16)[name = tensor<string, []>("op_2213_cast_fp16")];
+            tensor<fp16, []> denom_59_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_59_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_59_cast_fp16 = rsqrt(epsilon = denom_59_epsilon_0_to_fp16, x = var_2213_cast_fp16)[name = tensor<string, []>("denom_59_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_59_cast_fp16 = mul(x = zero_mean_59_cast_fp16, y = denom_59_cast_fp16)[name = tensor<string, []>("out_59_cast_fp16")];
+            tensor<fp16, [768]> input_95_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_95_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259928000)))];
+            tensor<fp16, [768]> input_95_beta_0_to_fp16 = const()[name = tensor<string, []>("input_95_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259929600)))];
+            tensor<fp16, []> input_95_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_95_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<int32, [2]> var_2224 = const()[name = tensor<string, []>("op_2224"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2226 = const()[name = tensor<string, []>("op_2226"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_97_pad_type_0 = const()[name = tensor<string, []>("input_97_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_97_pad_0 = const()[name = tensor<string, []>("input_97_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259931200)))];
+            tensor<fp16, [3072]> layers_9_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264649856)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_97_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = var_2226, groups = var_2037, pad = input_97_pad_0, pad_type = input_97_pad_type_0, strides = var_2224, weight = layers_9_fc1_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_mode_0 = const()[name = tensor<string, []>("input_99_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<int32, [2]> var_2232 = const()[name = tensor<string, []>("op_2232"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2234 = const()[name = tensor<string, []>("op_2234"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_21_pad_type_0 = const()[name = tensor<string, []>("hidden_states_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_21_pad_0 = const()[name = tensor<string, []>("hidden_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264656064)))];
+            tensor<fp16, [768]> layers_9_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_9_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269374720)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_21_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = var_2234, groups = var_2037, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = var_2232, weight = layers_9_fc2_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_2248 = const()[name = tensor<string, []>("op_2248"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2255 = const()[name = tensor<string, []>("op_2255"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2256 = const()[name = tensor<string, []>("op_2256"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2268 = const()[name = tensor<string, []>("op_2268"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_61_cast_fp16 = reduce_mean(axes = var_2268, keep_dims = var_2256, x = inputs_61_cast_fp16)[name = tensor<string, []>("channels_mean_61_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_61_cast_fp16 = sub(x = inputs_61_cast_fp16, y = channels_mean_61_cast_fp16)[name = tensor<string, []>("zero_mean_61_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_61_cast_fp16 = mul(x = zero_mean_61_cast_fp16, y = zero_mean_61_cast_fp16)[name = tensor<string, []>("zero_mean_sq_61_cast_fp16")];
+            tensor<int32, [1]> var_2272 = const()[name = tensor<string, []>("op_2272"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2273_cast_fp16 = reduce_mean(axes = var_2272, keep_dims = var_2256, x = zero_mean_sq_61_cast_fp16)[name = tensor<string, []>("op_2273_cast_fp16")];
+            tensor<fp16, []> var_2274_to_fp16 = const()[name = tensor<string, []>("op_2274_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2275_cast_fp16 = add(x = var_2273_cast_fp16, y = var_2274_to_fp16)[name = tensor<string, []>("op_2275_cast_fp16")];
+            tensor<fp16, []> denom_61_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_61_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_61_cast_fp16 = rsqrt(epsilon = denom_61_epsilon_0_to_fp16, x = var_2275_cast_fp16)[name = tensor<string, []>("denom_61_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_61_cast_fp16 = mul(x = zero_mean_61_cast_fp16, y = denom_61_cast_fp16)[name = tensor<string, []>("out_61_cast_fp16")];
+            tensor<fp16, [768]> obj_141_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_141_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269376320)))];
+            tensor<fp16, [768]> obj_141_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_141_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269377920)))];
+            tensor<fp16, []> obj_141_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_141_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor<string, []>("obj_141_cast_fp16")];
+            tensor<int32, [2]> var_2290 = const()[name = tensor<string, []>("op_2290"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2292 = const()[name = tensor<string, []>("op_2292"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_41_pad_type_0 = const()[name = tensor<string, []>("query_41_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_41_pad_0 = const()[name = tensor<string, []>("query_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(269379520)))];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270559232)))];
+            tensor<fp16, [1, 768, 1, 1]> query_41_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = var_2292, groups = var_2255, pad = query_41_pad_0, pad_type = query_41_pad_type_0, strides = var_2290, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("query_41_cast_fp16")];
+            tensor<int32, [2]> var_2296 = const()[name = tensor<string, []>("op_2296"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2298 = const()[name = tensor<string, []>("op_2298"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_21_pad_type_0 = const()[name = tensor<string, []>("current_key_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_21_pad_0 = const()[name = tensor<string, []>("current_key_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270560832)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_21_cast_fp16 = conv(dilations = var_2298, groups = var_2255, pad = current_key_21_pad_0, pad_type = current_key_21_pad_type_0, strides = var_2296, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("current_key_21_cast_fp16")];
+            tensor<int32, [2]> var_2303 = const()[name = tensor<string, []>("op_2303"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2305 = const()[name = tensor<string, []>("op_2305"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_21_pad_type_0 = const()[name = tensor<string, []>("current_value_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_21_pad_0 = const()[name = tensor<string, []>("current_value_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(271740544)))];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(272920256)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = var_2305, groups = var_2255, pad = current_value_21_pad_0, pad_type = current_value_21_pad_type_0, strides = var_2303, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_141_cast_fp16)[name = tensor<string, []>("current_value_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2312_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2312_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2314_cast_fp16 = mul(x = var_63_cast_fp16_10, y = var_161_cast_fp16)[name = tensor<string, []>("op_2314_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_41_cast_fp16 = add(x = var_2312_cast_fp16, y = var_2314_cast_fp16)[name = tensor<string, []>("key_41_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2316_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2316_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2318_cast_fp16 = mul(x = var_78_cast_fp16_10, y = var_161_cast_fp16)[name = tensor<string, []>("op_2318_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_41_cast_fp16 = add(x = var_2316_cast_fp16, y = var_2318_cast_fp16)[name = tensor<string, []>("value_41_cast_fp16")];
+            tensor<int32, [4]> var_2321 = const()[name = tensor<string, []>("op_2321"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2322_cast_fp16 = reshape(shape = var_2321, x = query_41_cast_fp16)[name = tensor<string, []>("op_2322_cast_fp16")];
+            tensor<fp16, []> var_2323_to_fp16 = const()[name = tensor<string, []>("op_2323_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2324_cast_fp16 = mul(x = var_2322_cast_fp16, y = var_2323_to_fp16)[name = tensor<string, []>("op_2324_cast_fp16")];
+            tensor<int32, [4]> var_2325 = const()[name = tensor<string, []>("op_2325"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2326_cast_fp16 = reshape(shape = var_2325, x = key_41_cast_fp16)[name = tensor<string, []>("op_2326_cast_fp16")];
+            tensor<bool, []> mh_w_61_transpose_x_0 = const()[name = tensor<string, []>("mh_w_61_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_61_transpose_y_0 = const()[name = tensor<string, []>("mh_w_61_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2324_cast_fp16, y = var_2326_cast_fp16)[name = tensor<string, []>("mh_w_61_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_63_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_2334_cast_fp16 = softmax(axis = var_2248, x = mh_w_63_cast_fp16)[name = tensor<string, []>("op_2334_cast_fp16")];
+            tensor<int32, [4]> var_2335 = const()[name = tensor<string, []>("op_2335"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2336_cast_fp16 = reshape(shape = var_2335, x = value_41_cast_fp16)[name = tensor<string, []>("op_2336_cast_fp16")];
+            tensor<bool, []> attn_41_transpose_x_0 = const()[name = tensor<string, []>("attn_41_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_41_transpose_y_0 = const()[name = tensor<string, []>("attn_41_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_2336_cast_fp16, y = var_2334_cast_fp16)[name = tensor<string, []>("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_2339 = const()[name = tensor<string, []>("op_2339"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_101_cast_fp16 = reshape(shape = var_2339, x = attn_41_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<int32, [2]> var_2343 = const()[name = tensor<string, []>("op_2343"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2345 = const()[name = tensor<string, []>("op_2345"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_147_pad_type_0 = const()[name = tensor<string, []>("obj_147_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_147_pad_0 = const()[name = tensor<string, []>("obj_147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(272921856)))];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274101568)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_147_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = var_2345, groups = var_2255, pad = obj_147_pad_0, pad_type = obj_147_pad_type_0, strides = var_2343, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("obj_147_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_147_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> var_2355 = const()[name = tensor<string, []>("op_2355"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_63_cast_fp16 = reduce_mean(axes = var_2355, keep_dims = var_2256, x = inputs_63_cast_fp16)[name = tensor<string, []>("channels_mean_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_63_cast_fp16 = sub(x = inputs_63_cast_fp16, y = channels_mean_63_cast_fp16)[name = tensor<string, []>("zero_mean_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_63_cast_fp16 = mul(x = zero_mean_63_cast_fp16, y = zero_mean_63_cast_fp16)[name = tensor<string, []>("zero_mean_sq_63_cast_fp16")];
+            tensor<int32, [1]> var_2359 = const()[name = tensor<string, []>("op_2359"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2360_cast_fp16 = reduce_mean(axes = var_2359, keep_dims = var_2256, x = zero_mean_sq_63_cast_fp16)[name = tensor<string, []>("op_2360_cast_fp16")];
+            tensor<fp16, []> var_2361_to_fp16 = const()[name = tensor<string, []>("op_2361_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2362_cast_fp16 = add(x = var_2360_cast_fp16, y = var_2361_to_fp16)[name = tensor<string, []>("op_2362_cast_fp16")];
+            tensor<fp16, []> denom_63_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_63_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_63_cast_fp16 = rsqrt(epsilon = denom_63_epsilon_0_to_fp16, x = var_2362_cast_fp16)[name = tensor<string, []>("denom_63_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_63_cast_fp16 = mul(x = zero_mean_63_cast_fp16, y = denom_63_cast_fp16)[name = tensor<string, []>("out_63_cast_fp16")];
+            tensor<fp16, [768]> obj_149_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_149_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274103168)))];
+            tensor<fp16, [768]> obj_149_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_149_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274104768)))];
+            tensor<fp16, []> obj_149_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_149_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_149_cast_fp16 = batch_norm(beta = obj_149_beta_0_to_fp16, epsilon = obj_149_epsilon_0_to_fp16, gamma = obj_149_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor<string, []>("obj_149_cast_fp16")];
+            tensor<int32, [2]> var_2377 = const()[name = tensor<string, []>("op_2377"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2379 = const()[name = tensor<string, []>("op_2379"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_43_pad_type_0 = const()[name = tensor<string, []>("query_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_43_pad_0 = const()[name = tensor<string, []>("query_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274106368)))];
+            tensor<fp16, [768]> layers_10_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275286080)))];
+            tensor<fp16, [1, 768, 1, 1]> query_43_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_bias_to_fp16, dilations = var_2379, groups = var_2255, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = var_2377, weight = layers_10_encoder_attn_q_proj_weight_to_fp16, x = obj_149_cast_fp16)[name = tensor<string, []>("query_43_cast_fp16")];
+            tensor<int32, [2]> var_2383 = const()[name = tensor<string, []>("op_2383"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2385 = const()[name = tensor<string, []>("op_2385"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_43_pad_type_0 = const()[name = tensor<string, []>("key_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_43_pad_0 = const()[name = tensor<string, []>("key_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275287680)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_43_cast_fp16 = conv(dilations = var_2385, groups = var_2255, pad = key_43_pad_0, pad_type = key_43_pad_type_0, strides = var_2383, weight = layers_10_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_43_cast_fp16")];
+            tensor<int32, [2]> var_2390 = const()[name = tensor<string, []>("op_2390"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2392 = const()[name = tensor<string, []>("op_2392"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_43_pad_type_0 = const()[name = tensor<string, []>("value_43_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_43_pad_0 = const()[name = tensor<string, []>("value_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(276467392)))];
+            tensor<fp16, [768]> layers_10_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277647104)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_43_cast_fp16 = conv(bias = layers_10_encoder_attn_v_proj_bias_to_fp16, dilations = var_2392, groups = var_2255, pad = value_43_pad_0, pad_type = value_43_pad_type_0, strides = var_2390, weight = layers_10_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_43_cast_fp16")];
+            tensor<int32, [4]> var_2396 = const()[name = tensor<string, []>("op_2396"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2397_cast_fp16 = reshape(shape = var_2396, x = query_43_cast_fp16)[name = tensor<string, []>("op_2397_cast_fp16")];
+            tensor<fp16, []> var_2398_to_fp16 = const()[name = tensor<string, []>("op_2398_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2399_cast_fp16 = mul(x = var_2397_cast_fp16, y = var_2398_to_fp16)[name = tensor<string, []>("op_2399_cast_fp16")];
+            tensor<int32, [4]> var_2400 = const()[name = tensor<string, []>("op_2400"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2401_cast_fp16 = reshape(shape = var_2400, x = key_43_cast_fp16)[name = tensor<string, []>("op_2401_cast_fp16")];
+            tensor<bool, []> mh_w_65_transpose_x_0 = const()[name = tensor<string, []>("mh_w_65_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_65_transpose_y_0 = const()[name = tensor<string, []>("mh_w_65_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2399_cast_fp16, y = var_2401_cast_fp16)[name = tensor<string, []>("mh_w_65_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_153_cast_fp16 = softmax(axis = var_2248, x = mh_w_65_cast_fp16)[name = tensor<string, []>("obj_153_cast_fp16")];
+            tensor<int32, [4]> var_2405 = const()[name = tensor<string, []>("op_2405"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2406_cast_fp16 = reshape(shape = var_2405, x = value_43_cast_fp16)[name = tensor<string, []>("op_2406_cast_fp16")];
+            tensor<bool, []> attn_43_transpose_x_0 = const()[name = tensor<string, []>("attn_43_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_43_transpose_y_0 = const()[name = tensor<string, []>("attn_43_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_2406_cast_fp16, y = obj_153_cast_fp16)[name = tensor<string, []>("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_2409 = const()[name = tensor<string, []>("op_2409"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_103_cast_fp16 = reshape(shape = var_2409, x = attn_43_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<int32, [2]> var_2413 = const()[name = tensor<string, []>("op_2413"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2415 = const()[name = tensor<string, []>("op_2415"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_151_pad_type_0 = const()[name = tensor<string, []>("obj_151_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_151_pad_0 = const()[name = tensor<string, []>("obj_151_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277648704)))];
+            tensor<fp16, [768]> layers_10_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278828416)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_151_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_bias_to_fp16, dilations = var_2415, groups = var_2255, pad = obj_151_pad_0, pad_type = obj_151_pad_type_0, strides = var_2413, weight = layers_10_encoder_attn_o_proj_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("obj_151_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_151_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, [1]> var_2424 = const()[name = tensor<string, []>("op_2424"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_65_cast_fp16 = reduce_mean(axes = var_2424, keep_dims = var_2256, x = inputs_65_cast_fp16)[name = tensor<string, []>("channels_mean_65_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_65_cast_fp16 = sub(x = inputs_65_cast_fp16, y = channels_mean_65_cast_fp16)[name = tensor<string, []>("zero_mean_65_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_65_cast_fp16 = mul(x = zero_mean_65_cast_fp16, y = zero_mean_65_cast_fp16)[name = tensor<string, []>("zero_mean_sq_65_cast_fp16")];
+            tensor<int32, [1]> var_2428 = const()[name = tensor<string, []>("op_2428"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2429_cast_fp16 = reduce_mean(axes = var_2428, keep_dims = var_2256, x = zero_mean_sq_65_cast_fp16)[name = tensor<string, []>("op_2429_cast_fp16")];
+            tensor<fp16, []> var_2430_to_fp16 = const()[name = tensor<string, []>("op_2430_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2431_cast_fp16 = add(x = var_2429_cast_fp16, y = var_2430_to_fp16)[name = tensor<string, []>("op_2431_cast_fp16")];
+            tensor<fp16, []> denom_65_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_65_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_65_cast_fp16 = rsqrt(epsilon = denom_65_epsilon_0_to_fp16, x = var_2431_cast_fp16)[name = tensor<string, []>("denom_65_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_65_cast_fp16 = mul(x = zero_mean_65_cast_fp16, y = denom_65_cast_fp16)[name = tensor<string, []>("out_65_cast_fp16")];
+            tensor<fp16, [768]> input_105_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_105_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278830016)))];
+            tensor<fp16, [768]> input_105_beta_0_to_fp16 = const()[name = tensor<string, []>("input_105_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278831616)))];
+            tensor<fp16, []> input_105_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_105_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<int32, [2]> var_2442 = const()[name = tensor<string, []>("op_2442"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2444 = const()[name = tensor<string, []>("op_2444"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_107_pad_type_0 = const()[name = tensor<string, []>("input_107_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_107_pad_0 = const()[name = tensor<string, []>("input_107_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278833216)))];
+            tensor<fp16, [3072]> layers_10_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283551872)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_107_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = var_2444, groups = var_2255, pad = input_107_pad_0, pad_type = input_107_pad_type_0, strides = var_2442, weight = layers_10_fc1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_mode_0 = const()[name = tensor<string, []>("input_109_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<int32, [2]> var_2450 = const()[name = tensor<string, []>("op_2450"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2452 = const()[name = tensor<string, []>("op_2452"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_23_pad_type_0 = const()[name = tensor<string, []>("hidden_states_23_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = tensor<string, []>("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283558080)))];
+            tensor<fp16, [768]> layers_10_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_10_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288276736)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_23_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = var_2452, groups = var_2255, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = var_2450, weight = layers_10_fc2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, []> var_2466 = const()[name = tensor<string, []>("op_2466"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_2473 = const()[name = tensor<string, []>("op_2473"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2474 = const()[name = tensor<string, []>("op_2474"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2486 = const()[name = tensor<string, []>("op_2486"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_67_cast_fp16 = reduce_mean(axes = var_2486, keep_dims = var_2474, x = inputs_67_cast_fp16)[name = tensor<string, []>("channels_mean_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_67_cast_fp16 = sub(x = inputs_67_cast_fp16, y = channels_mean_67_cast_fp16)[name = tensor<string, []>("zero_mean_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_67_cast_fp16 = mul(x = zero_mean_67_cast_fp16, y = zero_mean_67_cast_fp16)[name = tensor<string, []>("zero_mean_sq_67_cast_fp16")];
+            tensor<int32, [1]> var_2490 = const()[name = tensor<string, []>("op_2490"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2491_cast_fp16 = reduce_mean(axes = var_2490, keep_dims = var_2474, x = zero_mean_sq_67_cast_fp16)[name = tensor<string, []>("op_2491_cast_fp16")];
+            tensor<fp16, []> var_2492_to_fp16 = const()[name = tensor<string, []>("op_2492_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2493_cast_fp16 = add(x = var_2491_cast_fp16, y = var_2492_to_fp16)[name = tensor<string, []>("op_2493_cast_fp16")];
+            tensor<fp16, []> denom_67_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_67_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_67_cast_fp16 = rsqrt(epsilon = denom_67_epsilon_0_to_fp16, x = var_2493_cast_fp16)[name = tensor<string, []>("denom_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_67_cast_fp16 = mul(x = zero_mean_67_cast_fp16, y = denom_67_cast_fp16)[name = tensor<string, []>("out_67_cast_fp16")];
+            tensor<fp16, [768]> obj_155_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_155_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288278336)))];
+            tensor<fp16, [768]> obj_155_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_155_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288279936)))];
+            tensor<fp16, []> obj_155_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_155_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_155_cast_fp16 = batch_norm(beta = obj_155_beta_0_to_fp16, epsilon = obj_155_epsilon_0_to_fp16, gamma = obj_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor<string, []>("obj_155_cast_fp16")];
+            tensor<int32, [2]> var_2508 = const()[name = tensor<string, []>("op_2508"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2510 = const()[name = tensor<string, []>("op_2510"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_45_pad_type_0 = const()[name = tensor<string, []>("query_45_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_45_pad_0 = const()[name = tensor<string, []>("query_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288281536)))];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289461248)))];
+            tensor<fp16, [1, 768, 1, 1]> query_45_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = var_2510, groups = var_2473, pad = query_45_pad_0, pad_type = query_45_pad_type_0, strides = var_2508, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("query_45_cast_fp16")];
+            tensor<int32, [2]> var_2514 = const()[name = tensor<string, []>("op_2514"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2516 = const()[name = tensor<string, []>("op_2516"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289462848)))];
+            tensor<fp16, [1, 768, 1, 1]> current_key_cast_fp16 = conv(dilations = var_2516, groups = var_2473, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_2514, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
+            tensor<int32, [2]> var_2521 = const()[name = tensor<string, []>("op_2521"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2523 = const()[name = tensor<string, []>("op_2523"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290642560)))];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291822272)))];
+            tensor<fp16, [1, 768, 1, 1]> current_value_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = var_2523, groups = var_2473, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_2521, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_155_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2530_cast_fp16 = mul(x = current_key_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2530_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2532_cast_fp16 = mul(x = var_63_cast_fp16_11, y = var_161_cast_fp16)[name = tensor<string, []>("op_2532_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> key_45_cast_fp16 = add(x = var_2530_cast_fp16, y = var_2532_cast_fp16)[name = tensor<string, []>("key_45_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2534_cast_fp16 = mul(x = current_value_cast_fp16, y = var_158_cast_fp16)[name = tensor<string, []>("op_2534_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> var_2536_cast_fp16 = mul(x = var_78_cast_fp16_11, y = var_161_cast_fp16)[name = tensor<string, []>("op_2536_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 224]> value_45_cast_fp16 = add(x = var_2534_cast_fp16, y = var_2536_cast_fp16)[name = tensor<string, []>("value_45_cast_fp16")];
+            tensor<int32, [4]> var_2539 = const()[name = tensor<string, []>("op_2539"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2540_cast_fp16 = reshape(shape = var_2539, x = query_45_cast_fp16)[name = tensor<string, []>("op_2540_cast_fp16")];
+            tensor<fp16, []> var_2541_to_fp16 = const()[name = tensor<string, []>("op_2541_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2542_cast_fp16 = mul(x = var_2540_cast_fp16, y = var_2541_to_fp16)[name = tensor<string, []>("op_2542_cast_fp16")];
+            tensor<int32, [4]> var_2543 = const()[name = tensor<string, []>("op_2543"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2544_cast_fp16 = reshape(shape = var_2543, x = key_45_cast_fp16)[name = tensor<string, []>("op_2544_cast_fp16")];
+            tensor<bool, []> mh_w_67_transpose_x_0 = const()[name = tensor<string, []>("mh_w_67_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_67_transpose_y_0 = const()[name = tensor<string, []>("mh_w_67_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_67_cast_fp16 = matmul(transpose_x = mh_w_67_transpose_x_0, transpose_y = mh_w_67_transpose_y_0, x = var_2542_cast_fp16, y = var_2544_cast_fp16)[name = tensor<string, []>("mh_w_67_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> mh_w_69_cast_fp16 = add(x = mh_w_67_cast_fp16, y = var_179_cast_fp16)[name = tensor<string, []>("mh_w_69_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 224]> var_2552_cast_fp16 = softmax(axis = var_2466, x = mh_w_69_cast_fp16)[name = tensor<string, []>("op_2552_cast_fp16")];
+            tensor<int32, [4]> var_2553 = const()[name = tensor<string, []>("op_2553"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 224]> var_2554_cast_fp16 = reshape(shape = var_2553, x = value_45_cast_fp16)[name = tensor<string, []>("op_2554_cast_fp16")];
+            tensor<bool, []> attn_45_transpose_x_0 = const()[name = tensor<string, []>("attn_45_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_45_transpose_y_0 = const()[name = tensor<string, []>("attn_45_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_2554_cast_fp16, y = var_2552_cast_fp16)[name = tensor<string, []>("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_2557 = const()[name = tensor<string, []>("op_2557"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_111_cast_fp16 = reshape(shape = var_2557, x = attn_45_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<int32, [2]> var_2561 = const()[name = tensor<string, []>("op_2561"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2563 = const()[name = tensor<string, []>("op_2563"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_161_pad_type_0 = const()[name = tensor<string, []>("obj_161_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_161_pad_0 = const()[name = tensor<string, []>("obj_161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291823872)))];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293003584)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_161_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = var_2563, groups = var_2473, pad = obj_161_pad_0, pad_type = obj_161_pad_type_0, strides = var_2561, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("obj_161_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_161_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, [1]> var_2573 = const()[name = tensor<string, []>("op_2573"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_69_cast_fp16 = reduce_mean(axes = var_2573, keep_dims = var_2474, x = inputs_69_cast_fp16)[name = tensor<string, []>("channels_mean_69_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_69_cast_fp16 = sub(x = inputs_69_cast_fp16, y = channels_mean_69_cast_fp16)[name = tensor<string, []>("zero_mean_69_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_69_cast_fp16 = mul(x = zero_mean_69_cast_fp16, y = zero_mean_69_cast_fp16)[name = tensor<string, []>("zero_mean_sq_69_cast_fp16")];
+            tensor<int32, [1]> var_2577 = const()[name = tensor<string, []>("op_2577"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2578_cast_fp16 = reduce_mean(axes = var_2577, keep_dims = var_2474, x = zero_mean_sq_69_cast_fp16)[name = tensor<string, []>("op_2578_cast_fp16")];
+            tensor<fp16, []> var_2579_to_fp16 = const()[name = tensor<string, []>("op_2579_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2580_cast_fp16 = add(x = var_2578_cast_fp16, y = var_2579_to_fp16)[name = tensor<string, []>("op_2580_cast_fp16")];
+            tensor<fp16, []> denom_69_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_69_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_69_cast_fp16 = rsqrt(epsilon = denom_69_epsilon_0_to_fp16, x = var_2580_cast_fp16)[name = tensor<string, []>("denom_69_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_69_cast_fp16 = mul(x = zero_mean_69_cast_fp16, y = denom_69_cast_fp16)[name = tensor<string, []>("out_69_cast_fp16")];
+            tensor<fp16, [768]> obj_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_163_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293005184)))];
+            tensor<fp16, [768]> obj_163_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_163_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293006784)))];
+            tensor<fp16, []> obj_163_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_163_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_163_cast_fp16 = batch_norm(beta = obj_163_beta_0_to_fp16, epsilon = obj_163_epsilon_0_to_fp16, gamma = obj_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor<string, []>("obj_163_cast_fp16")];
+            tensor<int32, [2]> var_2595 = const()[name = tensor<string, []>("op_2595"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2597 = const()[name = tensor<string, []>("op_2597"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293008384)))];
+            tensor<fp16, [768]> layers_11_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(294188096)))];
+            tensor<fp16, [1, 768, 1, 1]> query_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_bias_to_fp16, dilations = var_2597, groups = var_2473, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_2595, weight = layers_11_encoder_attn_q_proj_weight_to_fp16, x = obj_163_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_2601 = const()[name = tensor<string, []>("op_2601"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2603 = const()[name = tensor<string, []>("op_2603"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(294189696)))];
+            tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = conv(dilations = var_2603, groups = var_2473, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_2601, weight = layers_11_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_2608 = const()[name = tensor<string, []>("op_2608"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2610 = const()[name = tensor<string, []>("op_2610"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295369408)))];
+            tensor<fp16, [768]> layers_11_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296549120)))];
+            tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = conv(bias = layers_11_encoder_attn_v_proj_bias_to_fp16, dilations = var_2610, groups = var_2473, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_2608, weight = layers_11_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_2614 = const()[name = tensor<string, []>("op_2614"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> var_2615_cast_fp16 = reshape(shape = var_2614, x = query_cast_fp16)[name = tensor<string, []>("op_2615_cast_fp16")];
+            tensor<fp16, []> var_2616_to_fp16 = const()[name = tensor<string, []>("op_2616_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2617_cast_fp16 = mul(x = var_2615_cast_fp16, y = var_2616_to_fp16)[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<int32, [4]> var_2618 = const()[name = tensor<string, []>("op_2618"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2619_cast_fp16 = reshape(shape = var_2618, x = key_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2617_cast_fp16, y = var_2619_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1500]> obj_167_cast_fp16 = softmax(axis = var_2466, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_167_cast_fp16")];
+            tensor<int32, [4]> var_2623 = const()[name = tensor<string, []>("op_2623"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2624_cast_fp16 = reshape(shape = var_2623, x = value_cast_fp16)[name = tensor<string, []>("op_2624_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2624_cast_fp16, y = obj_167_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_2627 = const()[name = tensor<string, []>("op_2627"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_113_cast_fp16 = reshape(shape = var_2627, x = attn_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<int32, [2]> var_2631 = const()[name = tensor<string, []>("op_2631"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2633 = const()[name = tensor<string, []>("op_2633"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_165_pad_type_0 = const()[name = tensor<string, []>("obj_165_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_165_pad_0 = const()[name = tensor<string, []>("obj_165_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296550720)))];
+            tensor<fp16, [768]> layers_11_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297730432)))];
+            tensor<fp16, [1, 768, 1, 1]> obj_165_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_bias_to_fp16, dilations = var_2633, groups = var_2473, pad = obj_165_pad_0, pad_type = obj_165_pad_type_0, strides = var_2631, weight = layers_11_encoder_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("obj_165_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_165_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> var_2639 = const()[name = tensor<string, []>("op_2639"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_71_cast_fp16 = reduce_mean(axes = var_2639, keep_dims = var_2474, x = inputs_71_cast_fp16)[name = tensor<string, []>("channels_mean_71_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_71_cast_fp16 = sub(x = inputs_71_cast_fp16, y = channels_mean_71_cast_fp16)[name = tensor<string, []>("zero_mean_71_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_71_cast_fp16 = mul(x = zero_mean_71_cast_fp16, y = zero_mean_71_cast_fp16)[name = tensor<string, []>("zero_mean_sq_71_cast_fp16")];
+            tensor<int32, [1]> var_2643 = const()[name = tensor<string, []>("op_2643"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2644_cast_fp16 = reduce_mean(axes = var_2643, keep_dims = var_2474, x = zero_mean_sq_71_cast_fp16)[name = tensor<string, []>("op_2644_cast_fp16")];
+            tensor<fp16, []> var_2645_to_fp16 = const()[name = tensor<string, []>("op_2645_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2646_cast_fp16 = add(x = var_2644_cast_fp16, y = var_2645_to_fp16)[name = tensor<string, []>("op_2646_cast_fp16")];
+            tensor<fp16, []> denom_71_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_71_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_71_cast_fp16 = rsqrt(epsilon = denom_71_epsilon_0_to_fp16, x = var_2646_cast_fp16)[name = tensor<string, []>("denom_71_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_71_cast_fp16 = mul(x = zero_mean_71_cast_fp16, y = denom_71_cast_fp16)[name = tensor<string, []>("out_71_cast_fp16")];
+            tensor<fp16, [768]> input_115_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_115_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297732032)))];
+            tensor<fp16, [768]> input_115_beta_0_to_fp16 = const()[name = tensor<string, []>("input_115_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297733632)))];
+            tensor<fp16, []> input_115_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_115_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<int32, [2]> var_2657 = const()[name = tensor<string, []>("op_2657"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2659 = const()[name = tensor<string, []>("op_2659"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_117_pad_type_0 = const()[name = tensor<string, []>("input_117_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_117_pad_0 = const()[name = tensor<string, []>("input_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297735232)))];
+            tensor<fp16, [3072]> layers_11_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302453888)))];
+            tensor<fp16, [1, 3072, 1, 1]> input_117_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = var_2659, groups = var_2473, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = var_2657, weight = layers_11_fc1_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_2665 = const()[name = tensor<string, []>("op_2665"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2667 = const()[name = tensor<string, []>("op_2667"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_25_pad_type_0 = const()[name = tensor<string, []>("hidden_states_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_25_pad_0 = const()[name = tensor<string, []>("hidden_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302460096)))];
+            tensor<fp16, [768]> layers_11_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_11_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307178752)))];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_25_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = var_2667, groups = var_2473, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = var_2665, weight = layers_11_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_2677 = const()[name = tensor<string, []>("op_2677"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2681 = const()[name = tensor<string, []>("op_2681"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_2681, keep_dims = var_2677, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_2685 = const()[name = tensor<string, []>("op_2685"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_2686_cast_fp16 = reduce_mean(axes = var_2685, keep_dims = var_2677, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_2686_cast_fp16")];
+            tensor<fp16, []> var_2687_to_fp16 = const()[name = tensor<string, []>("op_2687_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_2688_cast_fp16 = add(x = var_2686_cast_fp16, y = var_2687_to_fp16)[name = tensor<string, []>("op_2688_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_2688_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [768]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307180352)))];
+            tensor<fp16, [768]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307181952)))];
+            tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_2698_axes_0 = const()[name = tensor<string, []>("op_2698_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_2698_cast_fp16 = squeeze(axes = var_2698_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_2698_cast_fp16")];
+            tensor<int32, [3]> var_2701_perm_0 = const()[name = tensor<string, []>("op_2701_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307183552)))];
+            tensor<fp16, [1, 1, 768]> transpose_0 = transpose(perm = var_2701_perm_0, x = var_2698_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_2705 = const()[name = tensor<string, []>("op_2705"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_171_interleave_0 = const()[name = tensor<string, []>("obj_171_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 9216, 1, 1]> key_cache_updates = concat(axis = var_2705, interleave = obj_171_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_171_cast_fp16")];
+            tensor<int32, []> var_2708 = const()[name = tensor<string, []>("op_2708"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_173_interleave_0 = const()[name = tensor<string, []>("obj_173_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 9216, 1, 1]> value_cache_updates = concat(axis = var_2708, interleave = obj_173_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_173_cast_fp16")];
+            tensor<int32, [4]> var_2719_begin_0 = const()[name = tensor<string, []>("op_2719_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_2719_end_0 = const()[name = tensor<string, []>("op_2719_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_2719_end_mask_0 = const()[name = tensor<string, []>("op_2719_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2719_cast_fp16 = slice_by_index(begin = var_2719_begin_0, end = var_2719_end_0, end_mask = var_2719_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_2719_cast_fp16")];
+            tensor<int32, [4]> var_2722_begin_0 = const()[name = tensor<string, []>("op_2722_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2722_end_0 = const()[name = tensor<string, []>("op_2722_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2722_end_mask_0 = const()[name = tensor<string, []>("op_2722_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2722_squeeze_mask_0 = const()[name = tensor<string, []>("op_2722_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2722_cast_fp16 = slice_by_index(begin = var_2722_begin_0, end = var_2722_end_0, end_mask = var_2722_end_mask_0, squeeze_mask = var_2722_squeeze_mask_0, x = var_2719_cast_fp16)[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<int32, [4]> var_2737_begin_0 = const()[name = tensor<string, []>("op_2737_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_2737_end_0 = const()[name = tensor<string, []>("op_2737_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1500])];
+            tensor<bool, [4]> var_2737_end_mask_0 = const()[name = tensor<string, []>("op_2737_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2737_cast_fp16 = slice_by_index(begin = var_2737_begin_0, end = var_2737_end_0, end_mask = var_2737_end_mask_0, x = obj_83_cast_fp16)[name = tensor<string, []>("op_2737_cast_fp16")];
+            tensor<int32, [4]> var_2740_begin_0 = const()[name = tensor<string, []>("op_2740_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2740_end_0 = const()[name = tensor<string, []>("op_2740_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2740_end_mask_0 = const()[name = tensor<string, []>("op_2740_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2740_squeeze_mask_0 = const()[name = tensor<string, []>("op_2740_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2740_cast_fp16 = slice_by_index(begin = var_2740_begin_0, end = var_2740_end_0, end_mask = var_2740_end_mask_0, squeeze_mask = var_2740_squeeze_mask_0, x = var_2737_cast_fp16)[name = tensor<string, []>("op_2740_cast_fp16")];
+            tensor<int32, [4]> var_2755_begin_0 = const()[name = tensor<string, []>("op_2755_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2755_end_0 = const()[name = tensor<string, []>("op_2755_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2755_end_mask_0 = const()[name = tensor<string, []>("op_2755_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2755_cast_fp16 = slice_by_index(begin = var_2755_begin_0, end = var_2755_end_0, end_mask = var_2755_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2755_cast_fp16")];
+            tensor<int32, [4]> var_2758_begin_0 = const()[name = tensor<string, []>("op_2758_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2758_end_0 = const()[name = tensor<string, []>("op_2758_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2758_end_mask_0 = const()[name = tensor<string, []>("op_2758_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2758_squeeze_mask_0 = const()[name = tensor<string, []>("op_2758_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2758_cast_fp16 = slice_by_index(begin = var_2758_begin_0, end = var_2758_end_0, end_mask = var_2758_end_mask_0, squeeze_mask = var_2758_squeeze_mask_0, x = var_2755_cast_fp16)[name = tensor<string, []>("op_2758_cast_fp16")];
+            tensor<int32, [4]> var_2773_begin_0 = const()[name = tensor<string, []>("op_2773_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_2773_end_0 = const()[name = tensor<string, []>("op_2773_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_2773_end_mask_0 = const()[name = tensor<string, []>("op_2773_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2773_cast_fp16 = slice_by_index(begin = var_2773_begin_0, end = var_2773_end_0, end_mask = var_2773_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2773_cast_fp16")];
+            tensor<int32, [4]> var_2776_begin_0 = const()[name = tensor<string, []>("op_2776_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2776_end_0 = const()[name = tensor<string, []>("op_2776_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2776_end_mask_0 = const()[name = tensor<string, []>("op_2776_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2776_squeeze_mask_0 = const()[name = tensor<string, []>("op_2776_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2776_cast_fp16 = slice_by_index(begin = var_2776_begin_0, end = var_2776_end_0, end_mask = var_2776_end_mask_0, squeeze_mask = var_2776_squeeze_mask_0, x = var_2773_cast_fp16)[name = tensor<string, []>("op_2776_cast_fp16")];
+            tensor<int32, [4]> var_2791_begin_0 = const()[name = tensor<string, []>("op_2791_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_2791_end_0 = const()[name = tensor<string, []>("op_2791_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_2791_end_mask_0 = const()[name = tensor<string, []>("op_2791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2791_cast_fp16 = slice_by_index(begin = var_2791_begin_0, end = var_2791_end_0, end_mask = var_2791_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2791_cast_fp16")];
+            tensor<int32, [4]> var_2794_begin_0 = const()[name = tensor<string, []>("op_2794_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2794_end_0 = const()[name = tensor<string, []>("op_2794_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2794_end_mask_0 = const()[name = tensor<string, []>("op_2794_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2794_squeeze_mask_0 = const()[name = tensor<string, []>("op_2794_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2794_cast_fp16 = slice_by_index(begin = var_2794_begin_0, end = var_2794_end_0, end_mask = var_2794_end_mask_0, squeeze_mask = var_2794_squeeze_mask_0, x = var_2791_cast_fp16)[name = tensor<string, []>("op_2794_cast_fp16")];
+            tensor<int32, [4]> var_2809_begin_0 = const()[name = tensor<string, []>("op_2809_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_2809_end_0 = const()[name = tensor<string, []>("op_2809_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1500])];
+            tensor<bool, [4]> var_2809_end_mask_0 = const()[name = tensor<string, []>("op_2809_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2809_cast_fp16 = slice_by_index(begin = var_2809_begin_0, end = var_2809_end_0, end_mask = var_2809_end_mask_0, x = obj_125_cast_fp16)[name = tensor<string, []>("op_2809_cast_fp16")];
+            tensor<int32, [4]> var_2812_begin_0 = const()[name = tensor<string, []>("op_2812_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2812_end_0 = const()[name = tensor<string, []>("op_2812_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2812_end_mask_0 = const()[name = tensor<string, []>("op_2812_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2812_squeeze_mask_0 = const()[name = tensor<string, []>("op_2812_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2812_cast_fp16 = slice_by_index(begin = var_2812_begin_0, end = var_2812_end_0, end_mask = var_2812_end_mask_0, squeeze_mask = var_2812_squeeze_mask_0, x = var_2809_cast_fp16)[name = tensor<string, []>("op_2812_cast_fp16")];
+            tensor<int32, [4]> var_2827_begin_0 = const()[name = tensor<string, []>("op_2827_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2827_end_0 = const()[name = tensor<string, []>("op_2827_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2827_end_mask_0 = const()[name = tensor<string, []>("op_2827_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2827_cast_fp16 = slice_by_index(begin = var_2827_begin_0, end = var_2827_end_0, end_mask = var_2827_end_mask_0, x = obj_139_cast_fp16)[name = tensor<string, []>("op_2827_cast_fp16")];
+            tensor<int32, [4]> var_2830_begin_0 = const()[name = tensor<string, []>("op_2830_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2830_end_0 = const()[name = tensor<string, []>("op_2830_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2830_end_mask_0 = const()[name = tensor<string, []>("op_2830_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2830_squeeze_mask_0 = const()[name = tensor<string, []>("op_2830_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2830_cast_fp16 = slice_by_index(begin = var_2830_begin_0, end = var_2830_end_0, end_mask = var_2830_end_mask_0, squeeze_mask = var_2830_squeeze_mask_0, x = var_2827_cast_fp16)[name = tensor<string, []>("op_2830_cast_fp16")];
+            tensor<int32, [4]> var_2845_begin_0 = const()[name = tensor<string, []>("op_2845_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_2845_end_0 = const()[name = tensor<string, []>("op_2845_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_2845_end_mask_0 = const()[name = tensor<string, []>("op_2845_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2845_cast_fp16 = slice_by_index(begin = var_2845_begin_0, end = var_2845_end_0, end_mask = var_2845_end_mask_0, x = obj_139_cast_fp16)[name = tensor<string, []>("op_2845_cast_fp16")];
+            tensor<int32, [4]> var_2848_begin_0 = const()[name = tensor<string, []>("op_2848_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2848_end_0 = const()[name = tensor<string, []>("op_2848_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2848_end_mask_0 = const()[name = tensor<string, []>("op_2848_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2848_squeeze_mask_0 = const()[name = tensor<string, []>("op_2848_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2848_cast_fp16 = slice_by_index(begin = var_2848_begin_0, end = var_2848_end_0, end_mask = var_2848_end_mask_0, squeeze_mask = var_2848_squeeze_mask_0, x = var_2845_cast_fp16)[name = tensor<string, []>("op_2848_cast_fp16")];
+            tensor<int32, [4]> var_2863_begin_0 = const()[name = tensor<string, []>("op_2863_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_2863_end_0 = const()[name = tensor<string, []>("op_2863_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1500])];
+            tensor<bool, [4]> var_2863_end_mask_0 = const()[name = tensor<string, []>("op_2863_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2863_cast_fp16 = slice_by_index(begin = var_2863_begin_0, end = var_2863_end_0, end_mask = var_2863_end_mask_0, x = obj_139_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<int32, [4]> var_2866_begin_0 = const()[name = tensor<string, []>("op_2866_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2866_end_0 = const()[name = tensor<string, []>("op_2866_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2866_end_mask_0 = const()[name = tensor<string, []>("op_2866_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2866_squeeze_mask_0 = const()[name = tensor<string, []>("op_2866_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2866_cast_fp16 = slice_by_index(begin = var_2866_begin_0, end = var_2866_end_0, end_mask = var_2866_end_mask_0, squeeze_mask = var_2866_squeeze_mask_0, x = var_2863_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<int32, [4]> var_2881_begin_0 = const()[name = tensor<string, []>("op_2881_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_2881_end_0 = const()[name = tensor<string, []>("op_2881_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1500])];
+            tensor<bool, [4]> var_2881_end_mask_0 = const()[name = tensor<string, []>("op_2881_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2881_cast_fp16 = slice_by_index(begin = var_2881_begin_0, end = var_2881_end_0, end_mask = var_2881_end_mask_0, x = obj_153_cast_fp16)[name = tensor<string, []>("op_2881_cast_fp16")];
+            tensor<int32, [4]> var_2884_begin_0 = const()[name = tensor<string, []>("op_2884_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2884_end_0 = const()[name = tensor<string, []>("op_2884_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_2884_end_mask_0 = const()[name = tensor<string, []>("op_2884_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_2884_squeeze_mask_0 = const()[name = tensor<string, []>("op_2884_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_2884_cast_fp16 = slice_by_index(begin = var_2884_begin_0, end = var_2884_end_0, end_mask = var_2884_end_mask_0, squeeze_mask = var_2884_squeeze_mask_0, x = var_2881_cast_fp16)[name = tensor<string, []>("op_2884_cast_fp16")];
+            tensor<int32, []> var_2891 = const()[name = tensor<string, []>("op_2891"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_2892_interleave_0 = const()[name = tensor<string, []>("op_2892_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 10, 1500]> var_2892_cast_fp16 = concat(axis = var_2891, interleave = var_2892_interleave_0, values = (var_2722_cast_fp16, var_2740_cast_fp16, var_2758_cast_fp16, var_2776_cast_fp16, var_2794_cast_fp16, var_2812_cast_fp16, var_2830_cast_fp16, var_2848_cast_fp16, var_2866_cast_fp16, var_2884_cast_fp16))[name = tensor<string, []>("op_2892_cast_fp16")];
+            tensor<int32, [1]> var_2894 = const()[name = tensor<string, []>("op_2894"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_2895 = const()[name = tensor<string, []>("op_2895"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = var_2894, keep_dims = var_2895, x = var_2892_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-small/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..3200504bad9d1d06bf4223e5c695bea0257696e5
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea861c6dfdd866ed0f2e7fe0c3df7459daa44481cb25236e03698dd6d259391
+size 313629
diff --git a/openai_whisper-small/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-small/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f06c9ac384fec32001d96a53bd48156581906005
--- /dev/null
+++ b/openai_whisper-small/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfea8044a8f38e8d33f56585b1e75ce023d3845e2a945e20480bd7e16558016e
+size 307287346
diff --git a/openai_whisper-small/config.json b/openai_whisper-small/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9dee569cf0c20925208ec84fecbb95e873f8bf24
--- /dev/null
+++ b/openai_whisper-small/config.json
@@ -0,0 +1 @@
+{"_name_or_path": "openai/whisper-small", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 768, "decoder_attention_heads": 12, "decoder_ffn_dim": 3072, "decoder_layerdrop": 0.0, "decoder_layers": 12, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 12, "encoder_ffn_dim": 3072, "encoder_layerdrop": 0.0, "encoder_layers": 12, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 12, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865}
\ No newline at end of file
diff --git a/openai_whisper-small/generation_config.json b/openai_whisper-small/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdd26273f9cd1ab8ecda49f5b8c033134c61cb4a
--- /dev/null
+++ b/openai_whisper-small/generation_config.json
@@ -0,0 +1 @@
+{"alignment_heads": [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]], "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "decoder_start_token_id": 50258, "eos_token_id": 50257, "forced_decoder_ids": [[1, null], [2, 50359]], "is_multilingual": true, "lang_to_id": {"<|af|>": 50327, "<|am|>": 50334, "<|ar|>": 50272, "<|as|>": 50350, "<|az|>": 50304, "<|ba|>": 50355, "<|be|>": 50330, "<|bg|>": 50292, "<|bn|>": 50302, "<|bo|>": 50347, "<|br|>": 50309, "<|bs|>": 50315, "<|ca|>": 50270, "<|cs|>": 50283, "<|cy|>": 50297, "<|da|>": 50285, "<|de|>": 50261, "<|el|>": 50281, "<|en|>": 50259, "<|es|>": 50262, "<|et|>": 50307, "<|eu|>": 50310, "<|fa|>": 50300, "<|fi|>": 50277, "<|fo|>": 50338, "<|fr|>": 50265, "<|gl|>": 50319, "<|gu|>": 50333, "<|haw|>": 50352, "<|ha|>": 50354, "<|he|>": 50279, "<|hi|>": 50276, "<|hr|>": 50291, "<|ht|>": 50339, "<|hu|>": 50286, "<|hy|>": 50312, "<|id|>": 50275, "<|is|>": 50311, "<|it|>": 50274, "<|ja|>": 50266, "<|jw|>": 50356, "<|ka|>": 50329, "<|kk|>": 50316, "<|km|>": 50323, "<|kn|>": 50306, "<|ko|>": 50264, "<|la|>": 50294, "<|lb|>": 50345, "<|ln|>": 50353, "<|lo|>": 50336, "<|lt|>": 50293, "<|lv|>": 50301, "<|mg|>": 50349, "<|mi|>": 50295, "<|mk|>": 50308, "<|ml|>": 50296, "<|mn|>": 50314, "<|mr|>": 50320, "<|ms|>": 50282, "<|mt|>": 50343, "<|my|>": 50346, "<|ne|>": 50313, "<|nl|>": 50271, "<|nn|>": 50342, "<|no|>": 50288, "<|oc|>": 50328, "<|pa|>": 50321, "<|pl|>": 50269, "<|ps|>": 50340, "<|pt|>": 50267, "<|ro|>": 50284, "<|ru|>": 50263, "<|sa|>": 50344, "<|sd|>": 50332, "<|si|>": 50322, "<|sk|>": 50298, "<|sl|>": 50305, "<|sn|>": 50324, "<|so|>": 50326, "<|sq|>": 50317, "<|sr|>": 50303, "<|su|>": 50357, "<|sv|>": 50273, "<|sw|>": 50318, "<|ta|>": 50287, "<|te|>": 50299, "<|tg|>": 50331, "<|th|>": 50289, "<|tk|>": 50341, "<|tl|>": 50348, "<|tr|>": 50268, "<|tt|>": 50351, "<|uk|>": 50280, "<|ur|>": 50290, "<|uz|>": 50337, "<|vi|>": 50278, "<|yi|>": 50335, "<|yo|>": 50325, "<|zh|>": 50260}, "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50363, "pad_token_id": 50257, "prev_sot_token_id": 50361, "return_timestamps": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "task_to_id": {"transcribe": 50359, "translate": 50358}, "transformers_version": "4.31.0.dev0"}
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b4b0fd449ce60f8339a82b97494dc979da7d3d85
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaaaa6671a96a359a0bbd5e97885246dcc17f7435b6ffad8d871bb940964500b
+size 243
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dbb87205e58944de5b37730821b9445de8108597
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:325b182d0a4266730a81795ae6b7a787b5111dd091500fc0c04dedf610015d46
+size 347
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..a3e5dfdd923730a266e009edfa610abfa701d090
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,67 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 384 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 384, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 28,
+      "Ios16.add" : 9,
+      "Ios16.mul" : 96,
+      "SliceByIndex" : 168,
+      "Transpose" : 4,
+      "Ios16.batchNorm" : 9,
+      "Ios16.einsum" : 192,
+      "Ios16.gelu" : 6,
+      "Ios16.softmax" : 96,
+      "Ios16.layerNorm" : 9,
+      "Ios16.conv" : 26
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.0",
+      "com.github.apple.coremltools.source" : "torch==2.4.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlmodelc/model.mil b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..bb0db645154497f5df1961a01d0d4dab3af34f8a
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,1713 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            tensor<string, []> var_50_pad_type_0 = const()[name = tensor<string, []>("op_50_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_50_pad_0 = const()[name = tensor<string, []>("op_50_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_50_strides_0 = const()[name = tensor<string, []>("op_50_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_50_dilations_0 = const()[name = tensor<string, []>("op_50_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_50_groups_0 = const()[name = tensor<string, []>("op_50_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 80, 1, 3]> var_25_to_fp16 = const()[name = tensor<string, []>("op_25_to_fp16"), val = tensor<fp16, [384, 80, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [384]> var_31_to_fp16 = const()[name = tensor<string, []>("op_31_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184448)))];
+            tensor<fp16, [1, 384, 1, 3000]> var_50_cast_fp16 = conv(bias = var_31_to_fp16, dilations = var_50_dilations_0, groups = var_50_groups_0, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_50_strides_0, weight = var_25_to_fp16, x = melspectrogram_features)[name = tensor<string, []>("op_50_cast_fp16")];
+            tensor<string, []> hidden_states_1_mode_0 = const()[name = tensor<string, []>("hidden_states_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_50_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<string, []> var_90_pad_type_0 = const()[name = tensor<string, []>("op_90_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_90_strides_0 = const()[name = tensor<string, []>("op_90_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_90_dilations_0 = const()[name = tensor<string, []>("op_90_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_90_groups_0 = const()[name = tensor<string, []>("op_90_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 3]> var_65_to_fp16 = const()[name = tensor<string, []>("op_65_to_fp16"), val = tensor<fp16, [384, 384, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185280)))];
+            tensor<fp16, [384]> var_71_to_fp16 = const()[name = tensor<string, []>("op_71_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070080)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_90_cast_fp16 = conv(bias = var_71_to_fp16, dilations = var_90_dilations_0, groups = var_90_groups_0, pad = var_90_pad_0, pad_type = var_90_pad_type_0, strides = var_90_strides_0, weight = var_65_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_90_cast_fp16")];
+            tensor<string, []> hidden_states_3_mode_0 = const()[name = tensor<string, []>("hidden_states_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_90_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> var_108_to_fp16 = const()[name = tensor<string, []>("op_108_to_fp16"), val = tensor<fp16, [1, 384, 1, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070912)))];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_108_to_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_118 = const()[name = tensor<string, []>("op_118"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_129 = const()[name = tensor<string, []>("op_129"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = tensor<string, []>("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_146_to_fp16 = const()[name = tensor<string, []>("op_146_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_146_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [384]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2222976)))];
+            tensor<fp16, [384]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2223808)))];
+            tensor<fp16, [384]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2224640)))];
+            tensor<fp16, [384]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2225472)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_1_strides_0 = const()[name = tensor<string, []>("query_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_1_dilations_0 = const()[name = tensor<string, []>("query_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_1_groups_0 = const()[name = tensor<string, []>("query_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2226304)))];
+            tensor<fp16, [384]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2521280)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<string, []> key_1_pad_type_0 = const()[name = tensor<string, []>("key_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_1_strides_0 = const()[name = tensor<string, []>("key_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_1_pad_0 = const()[name = tensor<string, []>("key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_1_dilations_0 = const()[name = tensor<string, []>("key_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_1_groups_0 = const()[name = tensor<string, []>("key_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2522112)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_1_cast_fp16 = conv(dilations = key_1_dilations_0, groups = key_1_groups_0, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<string, []> value_1_pad_type_0 = const()[name = tensor<string, []>("value_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_1_strides_0 = const()[name = tensor<string, []>("value_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_1_pad_0 = const()[name = tensor<string, []>("value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_1_dilations_0 = const()[name = tensor<string, []>("value_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_1_groups_0 = const()[name = tensor<string, []>("value_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2817088)))];
+            tensor<fp16, [384]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3112064)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = value_1_dilations_0, groups = value_1_groups_0, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_184_begin_0 = const()[name = tensor<string, []>("op_184_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_184_end_0 = const()[name = tensor<string, []>("op_184_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_184_end_mask_0 = const()[name = tensor<string, []>("op_184_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_184_cast_fp16 = slice_by_index(begin = var_184_begin_0, end = var_184_end_0, end_mask = var_184_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_184_cast_fp16")];
+            tensor<int32, [4]> var_188_begin_0 = const()[name = tensor<string, []>("op_188_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_188_end_0 = const()[name = tensor<string, []>("op_188_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_188_end_mask_0 = const()[name = tensor<string, []>("op_188_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_188_cast_fp16 = slice_by_index(begin = var_188_begin_0, end = var_188_end_0, end_mask = var_188_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_188_cast_fp16")];
+            tensor<int32, [4]> var_192_begin_0 = const()[name = tensor<string, []>("op_192_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_192_end_0 = const()[name = tensor<string, []>("op_192_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_192_end_mask_0 = const()[name = tensor<string, []>("op_192_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_192_cast_fp16 = slice_by_index(begin = var_192_begin_0, end = var_192_end_0, end_mask = var_192_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_192_cast_fp16")];
+            tensor<int32, [4]> var_196_begin_0 = const()[name = tensor<string, []>("op_196_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_196_end_0 = const()[name = tensor<string, []>("op_196_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_196_end_mask_0 = const()[name = tensor<string, []>("op_196_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_196_cast_fp16 = slice_by_index(begin = var_196_begin_0, end = var_196_end_0, end_mask = var_196_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_196_cast_fp16")];
+            tensor<int32, [4]> var_200_begin_0 = const()[name = tensor<string, []>("op_200_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_200_end_0 = const()[name = tensor<string, []>("op_200_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_200_end_mask_0 = const()[name = tensor<string, []>("op_200_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_200_cast_fp16 = slice_by_index(begin = var_200_begin_0, end = var_200_end_0, end_mask = var_200_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_200_cast_fp16")];
+            tensor<int32, [4]> var_204_begin_0 = const()[name = tensor<string, []>("op_204_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_204_end_0 = const()[name = tensor<string, []>("op_204_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_204_end_mask_0 = const()[name = tensor<string, []>("op_204_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_204_cast_fp16 = slice_by_index(begin = var_204_begin_0, end = var_204_end_0, end_mask = var_204_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_204_cast_fp16")];
+            tensor<int32, [4]> var_213_begin_0 = const()[name = tensor<string, []>("op_213_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_213_end_0 = const()[name = tensor<string, []>("op_213_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_213_end_mask_0 = const()[name = tensor<string, []>("op_213_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_213_cast_fp16 = slice_by_index(begin = var_213_begin_0, end = var_213_end_0, end_mask = var_213_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_213_cast_fp16")];
+            tensor<int32, [4]> var_220_begin_0 = const()[name = tensor<string, []>("op_220_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_220_end_0 = const()[name = tensor<string, []>("op_220_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_220_end_mask_0 = const()[name = tensor<string, []>("op_220_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_220_cast_fp16 = slice_by_index(begin = var_220_begin_0, end = var_220_end_0, end_mask = var_220_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_220_cast_fp16")];
+            tensor<int32, [4]> var_227_begin_0 = const()[name = tensor<string, []>("op_227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_227_end_0 = const()[name = tensor<string, []>("op_227_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_227_end_mask_0 = const()[name = tensor<string, []>("op_227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_227_cast_fp16")];
+            tensor<int32, [4]> var_234_begin_0 = const()[name = tensor<string, []>("op_234_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_234_end_0 = const()[name = tensor<string, []>("op_234_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_234_end_mask_0 = const()[name = tensor<string, []>("op_234_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<int32, [4]> var_241_begin_0 = const()[name = tensor<string, []>("op_241_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_241_end_0 = const()[name = tensor<string, []>("op_241_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_241_end_mask_0 = const()[name = tensor<string, []>("op_241_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_241_cast_fp16 = slice_by_index(begin = var_241_begin_0, end = var_241_end_0, end_mask = var_241_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_241_cast_fp16")];
+            tensor<int32, [4]> var_248_begin_0 = const()[name = tensor<string, []>("op_248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_248_end_0 = const()[name = tensor<string, []>("op_248_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_248_end_mask_0 = const()[name = tensor<string, []>("op_248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_248_cast_fp16 = slice_by_index(begin = var_248_begin_0, end = var_248_end_0, end_mask = var_248_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_248_cast_fp16")];
+            tensor<int32, [4]> var_255_begin_0 = const()[name = tensor<string, []>("op_255_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_255_end_0 = const()[name = tensor<string, []>("op_255_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_255_end_mask_0 = const()[name = tensor<string, []>("op_255_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
+            tensor<int32, [4]> var_262_begin_0 = const()[name = tensor<string, []>("op_262_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_262_end_0 = const()[name = tensor<string, []>("op_262_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_262_end_mask_0 = const()[name = tensor<string, []>("op_262_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_262_cast_fp16")];
+            tensor<int32, [4]> var_269_begin_0 = const()[name = tensor<string, []>("op_269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_269_end_0 = const()[name = tensor<string, []>("op_269_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_269_end_mask_0 = const()[name = tensor<string, []>("op_269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_269_cast_fp16 = slice_by_index(begin = var_269_begin_0, end = var_269_end_0, end_mask = var_269_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_269_cast_fp16")];
+            tensor<int32, [4]> var_276_begin_0 = const()[name = tensor<string, []>("op_276_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_276_end_0 = const()[name = tensor<string, []>("op_276_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_276_end_mask_0 = const()[name = tensor<string, []>("op_276_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_276_cast_fp16 = slice_by_index(begin = var_276_begin_0, end = var_276_end_0, end_mask = var_276_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<int32, [4]> var_283_begin_0 = const()[name = tensor<string, []>("op_283_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_283_end_0 = const()[name = tensor<string, []>("op_283_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_283_end_mask_0 = const()[name = tensor<string, []>("op_283_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<int32, [4]> var_290_begin_0 = const()[name = tensor<string, []>("op_290_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_290_end_0 = const()[name = tensor<string, []>("op_290_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_290_end_mask_0 = const()[name = tensor<string, []>("op_290_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
+            tensor<int32, [4]> var_297_begin_0 = const()[name = tensor<string, []>("op_297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_297_end_0 = const()[name = tensor<string, []>("op_297_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_297_end_mask_0 = const()[name = tensor<string, []>("op_297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_297_cast_fp16 = slice_by_index(begin = var_297_begin_0, end = var_297_end_0, end_mask = var_297_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<int32, [4]> var_304_begin_0 = const()[name = tensor<string, []>("op_304_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_304_end_0 = const()[name = tensor<string, []>("op_304_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_304_end_mask_0 = const()[name = tensor<string, []>("op_304_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_304_cast_fp16 = slice_by_index(begin = var_304_begin_0, end = var_304_end_0, end_mask = var_304_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_304_cast_fp16")];
+            tensor<int32, [4]> var_311_begin_0 = const()[name = tensor<string, []>("op_311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_311_end_0 = const()[name = tensor<string, []>("op_311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_311_end_mask_0 = const()[name = tensor<string, []>("op_311_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<int32, [4]> var_318_begin_0 = const()[name = tensor<string, []>("op_318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_318_end_0 = const()[name = tensor<string, []>("op_318_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_318_end_mask_0 = const()[name = tensor<string, []>("op_318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_318_cast_fp16")];
+            tensor<int32, [4]> var_325_begin_0 = const()[name = tensor<string, []>("op_325_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_325_end_0 = const()[name = tensor<string, []>("op_325_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_325_end_mask_0 = const()[name = tensor<string, []>("op_325_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_325_cast_fp16")];
+            tensor<int32, [4]> var_332_begin_0 = const()[name = tensor<string, []>("op_332_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_332_end_0 = const()[name = tensor<string, []>("op_332_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_332_end_mask_0 = const()[name = tensor<string, []>("op_332_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_332_cast_fp16 = slice_by_index(begin = var_332_begin_0, end = var_332_end_0, end_mask = var_332_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_332_cast_fp16")];
+            tensor<int32, [4]> var_339_begin_0 = const()[name = tensor<string, []>("op_339_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_339_end_0 = const()[name = tensor<string, []>("op_339_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_339_end_mask_0 = const()[name = tensor<string, []>("op_339_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<int32, [4]> var_346_begin_0 = const()[name = tensor<string, []>("op_346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_346_end_0 = const()[name = tensor<string, []>("op_346_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_346_end_mask_0 = const()[name = tensor<string, []>("op_346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<int32, [4]> var_353_begin_0 = const()[name = tensor<string, []>("op_353_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_353_end_0 = const()[name = tensor<string, []>("op_353_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_353_end_mask_0 = const()[name = tensor<string, []>("op_353_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_353_cast_fp16 = slice_by_index(begin = var_353_begin_0, end = var_353_end_0, end_mask = var_353_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_353_cast_fp16")];
+            tensor<int32, [4]> var_360_begin_0 = const()[name = tensor<string, []>("op_360_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_360_end_0 = const()[name = tensor<string, []>("op_360_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_360_end_mask_0 = const()[name = tensor<string, []>("op_360_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_360_cast_fp16")];
+            tensor<int32, [4]> var_367_begin_0 = const()[name = tensor<string, []>("op_367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_367_end_0 = const()[name = tensor<string, []>("op_367_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_367_end_mask_0 = const()[name = tensor<string, []>("op_367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = var_367_end_0, end_mask = var_367_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_367_cast_fp16")];
+            tensor<int32, [4]> var_374_begin_0 = const()[name = tensor<string, []>("op_374_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_374_end_0 = const()[name = tensor<string, []>("op_374_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_374_end_mask_0 = const()[name = tensor<string, []>("op_374_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_374_cast_fp16")];
+            tensor<int32, [4]> k_1_perm_0 = const()[name = tensor<string, []>("k_1_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_379_begin_0 = const()[name = tensor<string, []>("op_379_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_379_end_0 = const()[name = tensor<string, []>("op_379_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_379_end_mask_0 = const()[name = tensor<string, []>("op_379_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_379_cast_fp16 = slice_by_index(begin = var_379_begin_0, end = var_379_end_0, end_mask = var_379_end_mask_0, x = k_1_cast_fp16)[name = tensor<string, []>("op_379_cast_fp16")];
+            tensor<int32, [4]> var_383_begin_0 = const()[name = tensor<string, []>("op_383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_383_end_0 = const()[name = tensor<string, []>("op_383_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_383_end_mask_0 = const()[name = tensor<string, []>("op_383_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = k_1_cast_fp16)[name = tensor<string, []>("op_383_cast_fp16")];
+            tensor<int32, [4]> var_387_begin_0 = const()[name = tensor<string, []>("op_387_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_387_end_0 = const()[name = tensor<string, []>("op_387_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_387_end_mask_0 = const()[name = tensor<string, []>("op_387_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_387_cast_fp16 = slice_by_index(begin = var_387_begin_0, end = var_387_end_0, end_mask = var_387_end_mask_0, x = k_1_cast_fp16)[name = tensor<string, []>("op_387_cast_fp16")];
+            tensor<int32, [4]> var_391_begin_0 = const()[name = tensor<string, []>("op_391_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_391_end_0 = const()[name = tensor<string, []>("op_391_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_391_end_mask_0 = const()[name = tensor<string, []>("op_391_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_391_cast_fp16 = slice_by_index(begin = var_391_begin_0, end = var_391_end_0, end_mask = var_391_end_mask_0, x = k_1_cast_fp16)[name = tensor<string, []>("op_391_cast_fp16")];
+            tensor<int32, [4]> var_395_begin_0 = const()[name = tensor<string, []>("op_395_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_395_end_0 = const()[name = tensor<string, []>("op_395_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_395_end_mask_0 = const()[name = tensor<string, []>("op_395_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = k_1_cast_fp16)[name = tensor<string, []>("op_395_cast_fp16")];
+            tensor<int32, [4]> var_399_begin_0 = const()[name = tensor<string, []>("op_399_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_399_end_0 = const()[name = tensor<string, []>("op_399_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_399_end_mask_0 = const()[name = tensor<string, []>("op_399_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = k_1_cast_fp16)[name = tensor<string, []>("op_399_cast_fp16")];
+            tensor<int32, [4]> var_401_begin_0 = const()[name = tensor<string, []>("op_401_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_401_end_0 = const()[name = tensor<string, []>("op_401_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_401_end_mask_0 = const()[name = tensor<string, []>("op_401_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_401_cast_fp16 = slice_by_index(begin = var_401_begin_0, end = var_401_end_0, end_mask = var_401_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_401_cast_fp16")];
+            tensor<int32, [4]> var_405_begin_0 = const()[name = tensor<string, []>("op_405_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_405_end_0 = const()[name = tensor<string, []>("op_405_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_405_end_mask_0 = const()[name = tensor<string, []>("op_405_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_405_cast_fp16 = slice_by_index(begin = var_405_begin_0, end = var_405_end_0, end_mask = var_405_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_405_cast_fp16")];
+            tensor<int32, [4]> var_409_begin_0 = const()[name = tensor<string, []>("op_409_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_409_end_0 = const()[name = tensor<string, []>("op_409_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_409_end_mask_0 = const()[name = tensor<string, []>("op_409_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_409_cast_fp16 = slice_by_index(begin = var_409_begin_0, end = var_409_end_0, end_mask = var_409_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<int32, [4]> var_413_begin_0 = const()[name = tensor<string, []>("op_413_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_413_end_0 = const()[name = tensor<string, []>("op_413_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_413_end_mask_0 = const()[name = tensor<string, []>("op_413_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_413_cast_fp16 = slice_by_index(begin = var_413_begin_0, end = var_413_end_0, end_mask = var_413_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_413_cast_fp16")];
+            tensor<int32, [4]> var_417_begin_0 = const()[name = tensor<string, []>("op_417_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_417_end_0 = const()[name = tensor<string, []>("op_417_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_417_end_mask_0 = const()[name = tensor<string, []>("op_417_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_417_cast_fp16 = slice_by_index(begin = var_417_begin_0, end = var_417_end_0, end_mask = var_417_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_417_cast_fp16")];
+            tensor<int32, [4]> var_421_begin_0 = const()[name = tensor<string, []>("op_421_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_421_end_0 = const()[name = tensor<string, []>("op_421_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_421_end_mask_0 = const()[name = tensor<string, []>("op_421_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_421_cast_fp16 = slice_by_index(begin = var_421_begin_0, end = var_421_end_0, end_mask = var_421_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_421_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_1_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_1_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1_equation_0, values = (var_379_cast_fp16, var_213_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_1_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_3_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_3_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3_equation_0, values = (var_379_cast_fp16, var_220_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_3_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_5_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_5_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5_equation_0, values = (var_379_cast_fp16, var_227_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_5_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_7_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_7_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_7_equation_0, values = (var_379_cast_fp16, var_234_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_7_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_9_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_9_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_9_equation_0, values = (var_383_cast_fp16, var_241_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_9_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_11_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_11_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_11_equation_0, values = (var_383_cast_fp16, var_248_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_11_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_13_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_13_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_13_equation_0, values = (var_383_cast_fp16, var_255_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_13_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_15_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_15_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_15_equation_0, values = (var_383_cast_fp16, var_262_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_15_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_17_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_17_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_17_equation_0, values = (var_387_cast_fp16, var_269_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_17_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_19_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_19_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_19_equation_0, values = (var_387_cast_fp16, var_276_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_19_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_21_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_21_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_21_equation_0, values = (var_387_cast_fp16, var_283_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_21_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_23_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_23_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_23_equation_0, values = (var_387_cast_fp16, var_290_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_23_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_25_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_25_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_25_equation_0, values = (var_391_cast_fp16, var_297_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_25_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_27_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_27_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_27_equation_0, values = (var_391_cast_fp16, var_304_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_27_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_29_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_29_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_29_equation_0, values = (var_391_cast_fp16, var_311_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_29_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_31_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_31_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_31_equation_0, values = (var_391_cast_fp16, var_318_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_31_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_33_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_33_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_33_equation_0, values = (var_395_cast_fp16, var_325_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_33_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_35_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_35_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_35_equation_0, values = (var_395_cast_fp16, var_332_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_35_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_37_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_37_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_37_equation_0, values = (var_395_cast_fp16, var_339_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_37_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_39_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_39_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_39_equation_0, values = (var_395_cast_fp16, var_346_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_39_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_41_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_41_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_41_equation_0, values = (var_399_cast_fp16, var_353_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_41_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_43_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_43_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_43_equation_0, values = (var_399_cast_fp16, var_360_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_43_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_45_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_45_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_45_equation_0, values = (var_399_cast_fp16, var_367_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_45_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_47_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_47_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_47_equation_0, values = (var_399_cast_fp16, var_374_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_47_cast_fp16")];
+            tensor<fp16, []> var_472_to_fp16 = const()[name = tensor<string, []>("op_472_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1_cast_fp16, y = var_472_to_fp16)[name = tensor<string, []>("aw_chunk_1_cast_fp16")];
+            tensor<fp16, []> var_474_to_fp16 = const()[name = tensor<string, []>("op_474_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_3_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3_cast_fp16, y = var_474_to_fp16)[name = tensor<string, []>("aw_chunk_3_cast_fp16")];
+            tensor<fp16, []> var_476_to_fp16 = const()[name = tensor<string, []>("op_476_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_5_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5_cast_fp16, y = var_476_to_fp16)[name = tensor<string, []>("aw_chunk_5_cast_fp16")];
+            tensor<fp16, []> var_478_to_fp16 = const()[name = tensor<string, []>("op_478_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_7_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_7_cast_fp16, y = var_478_to_fp16)[name = tensor<string, []>("aw_chunk_7_cast_fp16")];
+            tensor<fp16, []> var_480_to_fp16 = const()[name = tensor<string, []>("op_480_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_9_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_9_cast_fp16, y = var_480_to_fp16)[name = tensor<string, []>("aw_chunk_9_cast_fp16")];
+            tensor<fp16, []> var_482_to_fp16 = const()[name = tensor<string, []>("op_482_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_11_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_11_cast_fp16, y = var_482_to_fp16)[name = tensor<string, []>("aw_chunk_11_cast_fp16")];
+            tensor<fp16, []> var_484_to_fp16 = const()[name = tensor<string, []>("op_484_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_13_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_13_cast_fp16, y = var_484_to_fp16)[name = tensor<string, []>("aw_chunk_13_cast_fp16")];
+            tensor<fp16, []> var_486_to_fp16 = const()[name = tensor<string, []>("op_486_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_15_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_15_cast_fp16, y = var_486_to_fp16)[name = tensor<string, []>("aw_chunk_15_cast_fp16")];
+            tensor<fp16, []> var_488_to_fp16 = const()[name = tensor<string, []>("op_488_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_17_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_17_cast_fp16, y = var_488_to_fp16)[name = tensor<string, []>("aw_chunk_17_cast_fp16")];
+            tensor<fp16, []> var_490_to_fp16 = const()[name = tensor<string, []>("op_490_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_19_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_19_cast_fp16, y = var_490_to_fp16)[name = tensor<string, []>("aw_chunk_19_cast_fp16")];
+            tensor<fp16, []> var_492_to_fp16 = const()[name = tensor<string, []>("op_492_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_21_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_21_cast_fp16, y = var_492_to_fp16)[name = tensor<string, []>("aw_chunk_21_cast_fp16")];
+            tensor<fp16, []> var_494_to_fp16 = const()[name = tensor<string, []>("op_494_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_23_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_23_cast_fp16, y = var_494_to_fp16)[name = tensor<string, []>("aw_chunk_23_cast_fp16")];
+            tensor<fp16, []> var_496_to_fp16 = const()[name = tensor<string, []>("op_496_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_25_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_25_cast_fp16, y = var_496_to_fp16)[name = tensor<string, []>("aw_chunk_25_cast_fp16")];
+            tensor<fp16, []> var_498_to_fp16 = const()[name = tensor<string, []>("op_498_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_27_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_27_cast_fp16, y = var_498_to_fp16)[name = tensor<string, []>("aw_chunk_27_cast_fp16")];
+            tensor<fp16, []> var_500_to_fp16 = const()[name = tensor<string, []>("op_500_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_29_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_29_cast_fp16, y = var_500_to_fp16)[name = tensor<string, []>("aw_chunk_29_cast_fp16")];
+            tensor<fp16, []> var_502_to_fp16 = const()[name = tensor<string, []>("op_502_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_31_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_31_cast_fp16, y = var_502_to_fp16)[name = tensor<string, []>("aw_chunk_31_cast_fp16")];
+            tensor<fp16, []> var_504_to_fp16 = const()[name = tensor<string, []>("op_504_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_33_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_33_cast_fp16, y = var_504_to_fp16)[name = tensor<string, []>("aw_chunk_33_cast_fp16")];
+            tensor<fp16, []> var_506_to_fp16 = const()[name = tensor<string, []>("op_506_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_35_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_35_cast_fp16, y = var_506_to_fp16)[name = tensor<string, []>("aw_chunk_35_cast_fp16")];
+            tensor<fp16, []> var_508_to_fp16 = const()[name = tensor<string, []>("op_508_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_37_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_37_cast_fp16, y = var_508_to_fp16)[name = tensor<string, []>("aw_chunk_37_cast_fp16")];
+            tensor<fp16, []> var_510_to_fp16 = const()[name = tensor<string, []>("op_510_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_39_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_39_cast_fp16, y = var_510_to_fp16)[name = tensor<string, []>("aw_chunk_39_cast_fp16")];
+            tensor<fp16, []> var_512_to_fp16 = const()[name = tensor<string, []>("op_512_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_41_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_41_cast_fp16, y = var_512_to_fp16)[name = tensor<string, []>("aw_chunk_41_cast_fp16")];
+            tensor<fp16, []> var_514_to_fp16 = const()[name = tensor<string, []>("op_514_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_43_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_43_cast_fp16, y = var_514_to_fp16)[name = tensor<string, []>("aw_chunk_43_cast_fp16")];
+            tensor<fp16, []> var_516_to_fp16 = const()[name = tensor<string, []>("op_516_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_45_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_45_cast_fp16, y = var_516_to_fp16)[name = tensor<string, []>("aw_chunk_45_cast_fp16")];
+            tensor<fp16, []> var_518_to_fp16 = const()[name = tensor<string, []>("op_518_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_47_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_47_cast_fp16, y = var_518_to_fp16)[name = tensor<string, []>("aw_chunk_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_520_cast_fp16 = softmax(axis = var_129, x = aw_chunk_1_cast_fp16)[name = tensor<string, []>("op_520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_521_cast_fp16 = softmax(axis = var_129, x = aw_chunk_3_cast_fp16)[name = tensor<string, []>("op_521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_522_cast_fp16 = softmax(axis = var_129, x = aw_chunk_5_cast_fp16)[name = tensor<string, []>("op_522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_523_cast_fp16 = softmax(axis = var_129, x = aw_chunk_7_cast_fp16)[name = tensor<string, []>("op_523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_524_cast_fp16 = softmax(axis = var_129, x = aw_chunk_9_cast_fp16)[name = tensor<string, []>("op_524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_525_cast_fp16 = softmax(axis = var_129, x = aw_chunk_11_cast_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_526_cast_fp16 = softmax(axis = var_129, x = aw_chunk_13_cast_fp16)[name = tensor<string, []>("op_526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_527_cast_fp16 = softmax(axis = var_129, x = aw_chunk_15_cast_fp16)[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_528_cast_fp16 = softmax(axis = var_129, x = aw_chunk_17_cast_fp16)[name = tensor<string, []>("op_528_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_529_cast_fp16 = softmax(axis = var_129, x = aw_chunk_19_cast_fp16)[name = tensor<string, []>("op_529_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_530_cast_fp16 = softmax(axis = var_129, x = aw_chunk_21_cast_fp16)[name = tensor<string, []>("op_530_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_531_cast_fp16 = softmax(axis = var_129, x = aw_chunk_23_cast_fp16)[name = tensor<string, []>("op_531_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_532_cast_fp16 = softmax(axis = var_129, x = aw_chunk_25_cast_fp16)[name = tensor<string, []>("op_532_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_533_cast_fp16 = softmax(axis = var_129, x = aw_chunk_27_cast_fp16)[name = tensor<string, []>("op_533_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_534_cast_fp16 = softmax(axis = var_129, x = aw_chunk_29_cast_fp16)[name = tensor<string, []>("op_534_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_535_cast_fp16 = softmax(axis = var_129, x = aw_chunk_31_cast_fp16)[name = tensor<string, []>("op_535_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_536_cast_fp16 = softmax(axis = var_129, x = aw_chunk_33_cast_fp16)[name = tensor<string, []>("op_536_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_537_cast_fp16 = softmax(axis = var_129, x = aw_chunk_35_cast_fp16)[name = tensor<string, []>("op_537_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_538_cast_fp16 = softmax(axis = var_129, x = aw_chunk_37_cast_fp16)[name = tensor<string, []>("op_538_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_539_cast_fp16 = softmax(axis = var_129, x = aw_chunk_39_cast_fp16)[name = tensor<string, []>("op_539_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_540_cast_fp16 = softmax(axis = var_129, x = aw_chunk_41_cast_fp16)[name = tensor<string, []>("op_540_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_541_cast_fp16 = softmax(axis = var_129, x = aw_chunk_43_cast_fp16)[name = tensor<string, []>("op_541_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_542_cast_fp16 = softmax(axis = var_129, x = aw_chunk_45_cast_fp16)[name = tensor<string, []>("op_542_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_543_cast_fp16 = softmax(axis = var_129, x = aw_chunk_47_cast_fp16)[name = tensor<string, []>("op_543_cast_fp16")];
+            tensor<string, []> var_545_equation_0 = const()[name = tensor<string, []>("op_545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_545_cast_fp16 = einsum(equation = var_545_equation_0, values = (var_401_cast_fp16, var_520_cast_fp16))[name = tensor<string, []>("op_545_cast_fp16")];
+            tensor<string, []> var_547_equation_0 = const()[name = tensor<string, []>("op_547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_401_cast_fp16, var_521_cast_fp16))[name = tensor<string, []>("op_547_cast_fp16")];
+            tensor<string, []> var_549_equation_0 = const()[name = tensor<string, []>("op_549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_549_cast_fp16 = einsum(equation = var_549_equation_0, values = (var_401_cast_fp16, var_522_cast_fp16))[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<string, []> var_551_equation_0 = const()[name = tensor<string, []>("op_551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_401_cast_fp16, var_523_cast_fp16))[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<string, []> var_553_equation_0 = const()[name = tensor<string, []>("op_553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_553_cast_fp16 = einsum(equation = var_553_equation_0, values = (var_405_cast_fp16, var_524_cast_fp16))[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<string, []> var_555_equation_0 = const()[name = tensor<string, []>("op_555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_405_cast_fp16, var_525_cast_fp16))[name = tensor<string, []>("op_555_cast_fp16")];
+            tensor<string, []> var_557_equation_0 = const()[name = tensor<string, []>("op_557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_557_cast_fp16 = einsum(equation = var_557_equation_0, values = (var_405_cast_fp16, var_526_cast_fp16))[name = tensor<string, []>("op_557_cast_fp16")];
+            tensor<string, []> var_559_equation_0 = const()[name = tensor<string, []>("op_559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_405_cast_fp16, var_527_cast_fp16))[name = tensor<string, []>("op_559_cast_fp16")];
+            tensor<string, []> var_561_equation_0 = const()[name = tensor<string, []>("op_561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_561_cast_fp16 = einsum(equation = var_561_equation_0, values = (var_409_cast_fp16, var_528_cast_fp16))[name = tensor<string, []>("op_561_cast_fp16")];
+            tensor<string, []> var_563_equation_0 = const()[name = tensor<string, []>("op_563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_409_cast_fp16, var_529_cast_fp16))[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<string, []> var_565_equation_0 = const()[name = tensor<string, []>("op_565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_565_cast_fp16 = einsum(equation = var_565_equation_0, values = (var_409_cast_fp16, var_530_cast_fp16))[name = tensor<string, []>("op_565_cast_fp16")];
+            tensor<string, []> var_567_equation_0 = const()[name = tensor<string, []>("op_567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_409_cast_fp16, var_531_cast_fp16))[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<string, []> var_569_equation_0 = const()[name = tensor<string, []>("op_569_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_569_cast_fp16 = einsum(equation = var_569_equation_0, values = (var_413_cast_fp16, var_532_cast_fp16))[name = tensor<string, []>("op_569_cast_fp16")];
+            tensor<string, []> var_571_equation_0 = const()[name = tensor<string, []>("op_571_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_413_cast_fp16, var_533_cast_fp16))[name = tensor<string, []>("op_571_cast_fp16")];
+            tensor<string, []> var_573_equation_0 = const()[name = tensor<string, []>("op_573_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_573_cast_fp16 = einsum(equation = var_573_equation_0, values = (var_413_cast_fp16, var_534_cast_fp16))[name = tensor<string, []>("op_573_cast_fp16")];
+            tensor<string, []> var_575_equation_0 = const()[name = tensor<string, []>("op_575_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_575_cast_fp16 = einsum(equation = var_575_equation_0, values = (var_413_cast_fp16, var_535_cast_fp16))[name = tensor<string, []>("op_575_cast_fp16")];
+            tensor<string, []> var_577_equation_0 = const()[name = tensor<string, []>("op_577_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_577_cast_fp16 = einsum(equation = var_577_equation_0, values = (var_417_cast_fp16, var_536_cast_fp16))[name = tensor<string, []>("op_577_cast_fp16")];
+            tensor<string, []> var_579_equation_0 = const()[name = tensor<string, []>("op_579_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_579_cast_fp16 = einsum(equation = var_579_equation_0, values = (var_417_cast_fp16, var_537_cast_fp16))[name = tensor<string, []>("op_579_cast_fp16")];
+            tensor<string, []> var_581_equation_0 = const()[name = tensor<string, []>("op_581_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_581_cast_fp16 = einsum(equation = var_581_equation_0, values = (var_417_cast_fp16, var_538_cast_fp16))[name = tensor<string, []>("op_581_cast_fp16")];
+            tensor<string, []> var_583_equation_0 = const()[name = tensor<string, []>("op_583_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_583_cast_fp16 = einsum(equation = var_583_equation_0, values = (var_417_cast_fp16, var_539_cast_fp16))[name = tensor<string, []>("op_583_cast_fp16")];
+            tensor<string, []> var_585_equation_0 = const()[name = tensor<string, []>("op_585_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_585_cast_fp16 = einsum(equation = var_585_equation_0, values = (var_421_cast_fp16, var_540_cast_fp16))[name = tensor<string, []>("op_585_cast_fp16")];
+            tensor<string, []> var_587_equation_0 = const()[name = tensor<string, []>("op_587_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_587_cast_fp16 = einsum(equation = var_587_equation_0, values = (var_421_cast_fp16, var_541_cast_fp16))[name = tensor<string, []>("op_587_cast_fp16")];
+            tensor<string, []> var_589_equation_0 = const()[name = tensor<string, []>("op_589_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_589_cast_fp16 = einsum(equation = var_589_equation_0, values = (var_421_cast_fp16, var_542_cast_fp16))[name = tensor<string, []>("op_589_cast_fp16")];
+            tensor<string, []> var_591_equation_0 = const()[name = tensor<string, []>("op_591_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_591_cast_fp16 = einsum(equation = var_591_equation_0, values = (var_421_cast_fp16, var_543_cast_fp16))[name = tensor<string, []>("op_591_cast_fp16")];
+            tensor<bool, []> var_593_interleave_0 = const()[name = tensor<string, []>("op_593_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_593_cast_fp16 = concat(axis = var_118, interleave = var_593_interleave_0, values = (var_545_cast_fp16, var_547_cast_fp16, var_549_cast_fp16, var_551_cast_fp16))[name = tensor<string, []>("op_593_cast_fp16")];
+            tensor<bool, []> var_595_interleave_0 = const()[name = tensor<string, []>("op_595_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_595_cast_fp16 = concat(axis = var_118, interleave = var_595_interleave_0, values = (var_553_cast_fp16, var_555_cast_fp16, var_557_cast_fp16, var_559_cast_fp16))[name = tensor<string, []>("op_595_cast_fp16")];
+            tensor<bool, []> var_597_interleave_0 = const()[name = tensor<string, []>("op_597_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_597_cast_fp16 = concat(axis = var_118, interleave = var_597_interleave_0, values = (var_561_cast_fp16, var_563_cast_fp16, var_565_cast_fp16, var_567_cast_fp16))[name = tensor<string, []>("op_597_cast_fp16")];
+            tensor<bool, []> var_599_interleave_0 = const()[name = tensor<string, []>("op_599_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_599_cast_fp16 = concat(axis = var_118, interleave = var_599_interleave_0, values = (var_569_cast_fp16, var_571_cast_fp16, var_573_cast_fp16, var_575_cast_fp16))[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<bool, []> var_601_interleave_0 = const()[name = tensor<string, []>("op_601_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_601_cast_fp16 = concat(axis = var_118, interleave = var_601_interleave_0, values = (var_577_cast_fp16, var_579_cast_fp16, var_581_cast_fp16, var_583_cast_fp16))[name = tensor<string, []>("op_601_cast_fp16")];
+            tensor<bool, []> var_603_interleave_0 = const()[name = tensor<string, []>("op_603_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_603_cast_fp16 = concat(axis = var_118, interleave = var_603_interleave_0, values = (var_585_cast_fp16, var_587_cast_fp16, var_589_cast_fp16, var_591_cast_fp16))[name = tensor<string, []>("op_603_cast_fp16")];
+            tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_1_cast_fp16 = concat(axis = var_129, interleave = input_1_interleave_0, values = (var_593_cast_fp16, var_595_cast_fp16, var_597_cast_fp16, var_599_cast_fp16, var_601_cast_fp16, var_603_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> obj_3_pad_type_0 = const()[name = tensor<string, []>("obj_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_3_strides_0 = const()[name = tensor<string, []>("obj_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_3_pad_0 = const()[name = tensor<string, []>("obj_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_3_dilations_0 = const()[name = tensor<string, []>("obj_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_3_groups_0 = const()[name = tensor<string, []>("obj_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3112896)))];
+            tensor<fp16, [384]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3407872)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_3_dilations_0, groups = obj_3_groups_0, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = obj_3_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = tensor<string, []>("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_622_to_fp16 = const()[name = tensor<string, []>("op_622_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_622_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [384]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3408704)))];
+            tensor<fp16, [384]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3409536)))];
+            tensor<fp16, []> input_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> input_5_pad_type_0 = const()[name = tensor<string, []>("input_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_5_strides_0 = const()[name = tensor<string, []>("input_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_5_dilations_0 = const()[name = tensor<string, []>("input_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_5_groups_0 = const()[name = tensor<string, []>("input_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3410368)))];
+            tensor<fp16, [1536]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4590080)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_5_strides_0 = const()[name = tensor<string, []>("hidden_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_5_dilations_0 = const()[name = tensor<string, []>("hidden_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_5_groups_0 = const()[name = tensor<string, []>("hidden_states_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4593216)))];
+            tensor<fp16, [384]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5772928)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_651 = const()[name = tensor<string, []>("op_651"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_662 = const()[name = tensor<string, []>("op_662"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = tensor<string, []>("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_679_to_fp16 = const()[name = tensor<string, []>("op_679_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_679_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [384]> obj_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5773760)))];
+            tensor<fp16, [384]> obj_5_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_5_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5774592)))];
+            tensor<fp16, []> obj_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("obj_5_cast_fp16")];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_3_strides_0 = const()[name = tensor<string, []>("query_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_3_dilations_0 = const()[name = tensor<string, []>("query_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_3_groups_0 = const()[name = tensor<string, []>("query_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5775424)))];
+            tensor<fp16, [384]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6070400)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_3_strides_0 = const()[name = tensor<string, []>("key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_3_dilations_0 = const()[name = tensor<string, []>("key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_3_groups_0 = const()[name = tensor<string, []>("key_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6071232)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_3_strides_0 = const()[name = tensor<string, []>("value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_3_dilations_0 = const()[name = tensor<string, []>("value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_3_groups_0 = const()[name = tensor<string, []>("value_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6366208)))];
+            tensor<fp16, [384]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6661184)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_717_begin_0 = const()[name = tensor<string, []>("op_717_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_717_end_0 = const()[name = tensor<string, []>("op_717_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_717_end_mask_0 = const()[name = tensor<string, []>("op_717_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_717_cast_fp16 = slice_by_index(begin = var_717_begin_0, end = var_717_end_0, end_mask = var_717_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_717_cast_fp16")];
+            tensor<int32, [4]> var_721_begin_0 = const()[name = tensor<string, []>("op_721_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_721_end_0 = const()[name = tensor<string, []>("op_721_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_721_end_mask_0 = const()[name = tensor<string, []>("op_721_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_721_cast_fp16 = slice_by_index(begin = var_721_begin_0, end = var_721_end_0, end_mask = var_721_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_721_cast_fp16")];
+            tensor<int32, [4]> var_725_begin_0 = const()[name = tensor<string, []>("op_725_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_725_end_0 = const()[name = tensor<string, []>("op_725_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_725_end_mask_0 = const()[name = tensor<string, []>("op_725_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_725_cast_fp16 = slice_by_index(begin = var_725_begin_0, end = var_725_end_0, end_mask = var_725_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<int32, [4]> var_729_begin_0 = const()[name = tensor<string, []>("op_729_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_729_end_0 = const()[name = tensor<string, []>("op_729_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_729_end_mask_0 = const()[name = tensor<string, []>("op_729_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_729_cast_fp16 = slice_by_index(begin = var_729_begin_0, end = var_729_end_0, end_mask = var_729_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_729_cast_fp16")];
+            tensor<int32, [4]> var_733_begin_0 = const()[name = tensor<string, []>("op_733_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_733_end_0 = const()[name = tensor<string, []>("op_733_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_733_end_mask_0 = const()[name = tensor<string, []>("op_733_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_733_cast_fp16 = slice_by_index(begin = var_733_begin_0, end = var_733_end_0, end_mask = var_733_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_733_cast_fp16")];
+            tensor<int32, [4]> var_737_begin_0 = const()[name = tensor<string, []>("op_737_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_737_end_0 = const()[name = tensor<string, []>("op_737_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_737_end_mask_0 = const()[name = tensor<string, []>("op_737_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_737_cast_fp16 = slice_by_index(begin = var_737_begin_0, end = var_737_end_0, end_mask = var_737_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_737_cast_fp16")];
+            tensor<int32, [4]> var_746_begin_0 = const()[name = tensor<string, []>("op_746_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_746_end_0 = const()[name = tensor<string, []>("op_746_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_746_end_mask_0 = const()[name = tensor<string, []>("op_746_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_746_cast_fp16")];
+            tensor<int32, [4]> var_753_begin_0 = const()[name = tensor<string, []>("op_753_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_753_end_0 = const()[name = tensor<string, []>("op_753_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_753_end_mask_0 = const()[name = tensor<string, []>("op_753_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_753_cast_fp16 = slice_by_index(begin = var_753_begin_0, end = var_753_end_0, end_mask = var_753_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_753_cast_fp16")];
+            tensor<int32, [4]> var_760_begin_0 = const()[name = tensor<string, []>("op_760_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_760_end_0 = const()[name = tensor<string, []>("op_760_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_760_end_mask_0 = const()[name = tensor<string, []>("op_760_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_760_cast_fp16 = slice_by_index(begin = var_760_begin_0, end = var_760_end_0, end_mask = var_760_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_760_cast_fp16")];
+            tensor<int32, [4]> var_767_begin_0 = const()[name = tensor<string, []>("op_767_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_767_end_0 = const()[name = tensor<string, []>("op_767_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_767_end_mask_0 = const()[name = tensor<string, []>("op_767_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_767_cast_fp16 = slice_by_index(begin = var_767_begin_0, end = var_767_end_0, end_mask = var_767_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_767_cast_fp16")];
+            tensor<int32, [4]> var_774_begin_0 = const()[name = tensor<string, []>("op_774_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_774_end_0 = const()[name = tensor<string, []>("op_774_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_774_end_mask_0 = const()[name = tensor<string, []>("op_774_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_774_cast_fp16 = slice_by_index(begin = var_774_begin_0, end = var_774_end_0, end_mask = var_774_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_774_cast_fp16")];
+            tensor<int32, [4]> var_781_begin_0 = const()[name = tensor<string, []>("op_781_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_781_end_0 = const()[name = tensor<string, []>("op_781_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_781_end_mask_0 = const()[name = tensor<string, []>("op_781_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_781_cast_fp16 = slice_by_index(begin = var_781_begin_0, end = var_781_end_0, end_mask = var_781_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_781_cast_fp16")];
+            tensor<int32, [4]> var_788_begin_0 = const()[name = tensor<string, []>("op_788_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_788_end_0 = const()[name = tensor<string, []>("op_788_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_788_end_mask_0 = const()[name = tensor<string, []>("op_788_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_788_cast_fp16 = slice_by_index(begin = var_788_begin_0, end = var_788_end_0, end_mask = var_788_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_788_cast_fp16")];
+            tensor<int32, [4]> var_795_begin_0 = const()[name = tensor<string, []>("op_795_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_795_end_0 = const()[name = tensor<string, []>("op_795_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_795_end_mask_0 = const()[name = tensor<string, []>("op_795_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_795_cast_fp16 = slice_by_index(begin = var_795_begin_0, end = var_795_end_0, end_mask = var_795_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_795_cast_fp16")];
+            tensor<int32, [4]> var_802_begin_0 = const()[name = tensor<string, []>("op_802_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_802_end_0 = const()[name = tensor<string, []>("op_802_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_802_end_mask_0 = const()[name = tensor<string, []>("op_802_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_802_cast_fp16 = slice_by_index(begin = var_802_begin_0, end = var_802_end_0, end_mask = var_802_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_802_cast_fp16")];
+            tensor<int32, [4]> var_809_begin_0 = const()[name = tensor<string, []>("op_809_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_809_end_0 = const()[name = tensor<string, []>("op_809_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_809_end_mask_0 = const()[name = tensor<string, []>("op_809_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_809_cast_fp16 = slice_by_index(begin = var_809_begin_0, end = var_809_end_0, end_mask = var_809_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_809_cast_fp16")];
+            tensor<int32, [4]> var_816_begin_0 = const()[name = tensor<string, []>("op_816_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_816_end_0 = const()[name = tensor<string, []>("op_816_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_816_end_mask_0 = const()[name = tensor<string, []>("op_816_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_816_cast_fp16 = slice_by_index(begin = var_816_begin_0, end = var_816_end_0, end_mask = var_816_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<int32, [4]> var_823_begin_0 = const()[name = tensor<string, []>("op_823_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_823_end_0 = const()[name = tensor<string, []>("op_823_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_823_end_mask_0 = const()[name = tensor<string, []>("op_823_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_823_cast_fp16 = slice_by_index(begin = var_823_begin_0, end = var_823_end_0, end_mask = var_823_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<int32, [4]> var_830_begin_0 = const()[name = tensor<string, []>("op_830_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_830_end_0 = const()[name = tensor<string, []>("op_830_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_830_end_mask_0 = const()[name = tensor<string, []>("op_830_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_830_cast_fp16 = slice_by_index(begin = var_830_begin_0, end = var_830_end_0, end_mask = var_830_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<int32, [4]> var_837_begin_0 = const()[name = tensor<string, []>("op_837_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_837_end_0 = const()[name = tensor<string, []>("op_837_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_837_end_mask_0 = const()[name = tensor<string, []>("op_837_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_837_cast_fp16")];
+            tensor<int32, [4]> var_844_begin_0 = const()[name = tensor<string, []>("op_844_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_844_end_0 = const()[name = tensor<string, []>("op_844_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_844_end_mask_0 = const()[name = tensor<string, []>("op_844_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_844_cast_fp16 = slice_by_index(begin = var_844_begin_0, end = var_844_end_0, end_mask = var_844_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_844_cast_fp16")];
+            tensor<int32, [4]> var_851_begin_0 = const()[name = tensor<string, []>("op_851_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_851_end_0 = const()[name = tensor<string, []>("op_851_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_851_end_mask_0 = const()[name = tensor<string, []>("op_851_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_851_cast_fp16 = slice_by_index(begin = var_851_begin_0, end = var_851_end_0, end_mask = var_851_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<int32, [4]> var_858_begin_0 = const()[name = tensor<string, []>("op_858_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_858_end_0 = const()[name = tensor<string, []>("op_858_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_858_end_mask_0 = const()[name = tensor<string, []>("op_858_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_858_cast_fp16 = slice_by_index(begin = var_858_begin_0, end = var_858_end_0, end_mask = var_858_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_858_cast_fp16")];
+            tensor<int32, [4]> var_865_begin_0 = const()[name = tensor<string, []>("op_865_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_865_end_0 = const()[name = tensor<string, []>("op_865_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_865_end_mask_0 = const()[name = tensor<string, []>("op_865_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<int32, [4]> var_872_begin_0 = const()[name = tensor<string, []>("op_872_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_872_end_0 = const()[name = tensor<string, []>("op_872_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_872_end_mask_0 = const()[name = tensor<string, []>("op_872_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = var_872_end_0, end_mask = var_872_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_872_cast_fp16")];
+            tensor<int32, [4]> var_879_begin_0 = const()[name = tensor<string, []>("op_879_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_879_end_0 = const()[name = tensor<string, []>("op_879_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_879_end_mask_0 = const()[name = tensor<string, []>("op_879_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_879_cast_fp16 = slice_by_index(begin = var_879_begin_0, end = var_879_end_0, end_mask = var_879_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_879_cast_fp16")];
+            tensor<int32, [4]> var_886_begin_0 = const()[name = tensor<string, []>("op_886_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_886_end_0 = const()[name = tensor<string, []>("op_886_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_886_end_mask_0 = const()[name = tensor<string, []>("op_886_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_886_cast_fp16 = slice_by_index(begin = var_886_begin_0, end = var_886_end_0, end_mask = var_886_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<int32, [4]> var_893_begin_0 = const()[name = tensor<string, []>("op_893_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_893_end_0 = const()[name = tensor<string, []>("op_893_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_893_end_mask_0 = const()[name = tensor<string, []>("op_893_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_893_cast_fp16 = slice_by_index(begin = var_893_begin_0, end = var_893_end_0, end_mask = var_893_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_893_cast_fp16")];
+            tensor<int32, [4]> var_900_begin_0 = const()[name = tensor<string, []>("op_900_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_900_end_0 = const()[name = tensor<string, []>("op_900_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_900_end_mask_0 = const()[name = tensor<string, []>("op_900_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = var_900_end_0, end_mask = var_900_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_900_cast_fp16")];
+            tensor<int32, [4]> var_907_begin_0 = const()[name = tensor<string, []>("op_907_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_907_end_0 = const()[name = tensor<string, []>("op_907_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_907_end_mask_0 = const()[name = tensor<string, []>("op_907_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_907_cast_fp16 = slice_by_index(begin = var_907_begin_0, end = var_907_end_0, end_mask = var_907_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_907_cast_fp16")];
+            tensor<int32, [4]> k_3_perm_0 = const()[name = tensor<string, []>("k_3_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_912_begin_0 = const()[name = tensor<string, []>("op_912_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_912_end_0 = const()[name = tensor<string, []>("op_912_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_912_end_mask_0 = const()[name = tensor<string, []>("op_912_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_912_cast_fp16 = slice_by_index(begin = var_912_begin_0, end = var_912_end_0, end_mask = var_912_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_912_cast_fp16")];
+            tensor<int32, [4]> var_916_begin_0 = const()[name = tensor<string, []>("op_916_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_916_end_0 = const()[name = tensor<string, []>("op_916_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_916_end_mask_0 = const()[name = tensor<string, []>("op_916_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_916_cast_fp16 = slice_by_index(begin = var_916_begin_0, end = var_916_end_0, end_mask = var_916_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<int32, [4]> var_920_begin_0 = const()[name = tensor<string, []>("op_920_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_920_end_0 = const()[name = tensor<string, []>("op_920_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_920_end_mask_0 = const()[name = tensor<string, []>("op_920_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_920_cast_fp16")];
+            tensor<int32, [4]> var_924_begin_0 = const()[name = tensor<string, []>("op_924_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_924_end_0 = const()[name = tensor<string, []>("op_924_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_924_end_mask_0 = const()[name = tensor<string, []>("op_924_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_924_cast_fp16")];
+            tensor<int32, [4]> var_928_begin_0 = const()[name = tensor<string, []>("op_928_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_928_end_0 = const()[name = tensor<string, []>("op_928_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_928_end_mask_0 = const()[name = tensor<string, []>("op_928_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_928_cast_fp16")];
+            tensor<int32, [4]> var_932_begin_0 = const()[name = tensor<string, []>("op_932_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_932_end_0 = const()[name = tensor<string, []>("op_932_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_932_end_mask_0 = const()[name = tensor<string, []>("op_932_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = k_3_cast_fp16)[name = tensor<string, []>("op_932_cast_fp16")];
+            tensor<int32, [4]> var_934_begin_0 = const()[name = tensor<string, []>("op_934_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_934_end_0 = const()[name = tensor<string, []>("op_934_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_934_end_mask_0 = const()[name = tensor<string, []>("op_934_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_934_cast_fp16 = slice_by_index(begin = var_934_begin_0, end = var_934_end_0, end_mask = var_934_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_934_cast_fp16")];
+            tensor<int32, [4]> var_938_begin_0 = const()[name = tensor<string, []>("op_938_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_938_end_0 = const()[name = tensor<string, []>("op_938_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_938_end_mask_0 = const()[name = tensor<string, []>("op_938_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_938_cast_fp16 = slice_by_index(begin = var_938_begin_0, end = var_938_end_0, end_mask = var_938_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_938_cast_fp16")];
+            tensor<int32, [4]> var_942_begin_0 = const()[name = tensor<string, []>("op_942_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_942_end_0 = const()[name = tensor<string, []>("op_942_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_942_end_mask_0 = const()[name = tensor<string, []>("op_942_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_942_cast_fp16 = slice_by_index(begin = var_942_begin_0, end = var_942_end_0, end_mask = var_942_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_942_cast_fp16")];
+            tensor<int32, [4]> var_946_begin_0 = const()[name = tensor<string, []>("op_946_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_946_end_0 = const()[name = tensor<string, []>("op_946_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_946_end_mask_0 = const()[name = tensor<string, []>("op_946_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_946_cast_fp16 = slice_by_index(begin = var_946_begin_0, end = var_946_end_0, end_mask = var_946_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_946_cast_fp16")];
+            tensor<int32, [4]> var_950_begin_0 = const()[name = tensor<string, []>("op_950_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_950_end_0 = const()[name = tensor<string, []>("op_950_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_950_end_mask_0 = const()[name = tensor<string, []>("op_950_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_950_cast_fp16 = slice_by_index(begin = var_950_begin_0, end = var_950_end_0, end_mask = var_950_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_950_cast_fp16")];
+            tensor<int32, [4]> var_954_begin_0 = const()[name = tensor<string, []>("op_954_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_954_end_0 = const()[name = tensor<string, []>("op_954_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_954_end_mask_0 = const()[name = tensor<string, []>("op_954_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_954_cast_fp16 = slice_by_index(begin = var_954_begin_0, end = var_954_end_0, end_mask = var_954_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_954_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_49_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_49_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_49_equation_0, values = (var_912_cast_fp16, var_746_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_49_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_51_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_51_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_51_equation_0, values = (var_912_cast_fp16, var_753_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_51_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_53_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_53_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_53_equation_0, values = (var_912_cast_fp16, var_760_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_53_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_55_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_55_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_55_equation_0, values = (var_912_cast_fp16, var_767_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_55_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_57_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_57_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_57_equation_0, values = (var_916_cast_fp16, var_774_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_57_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_59_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_59_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_59_equation_0, values = (var_916_cast_fp16, var_781_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_59_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_61_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_61_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_61_equation_0, values = (var_916_cast_fp16, var_788_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_61_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_63_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_63_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_63_equation_0, values = (var_916_cast_fp16, var_795_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_63_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_65_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_65_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_65_equation_0, values = (var_920_cast_fp16, var_802_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_65_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_67_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_67_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_67_equation_0, values = (var_920_cast_fp16, var_809_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_67_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_69_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_69_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_69_equation_0, values = (var_920_cast_fp16, var_816_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_69_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_71_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_71_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_71_equation_0, values = (var_920_cast_fp16, var_823_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_71_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_73_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_73_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_73_equation_0, values = (var_924_cast_fp16, var_830_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_73_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_75_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_75_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_75_equation_0, values = (var_924_cast_fp16, var_837_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_75_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_77_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_77_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_77_equation_0, values = (var_924_cast_fp16, var_844_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_77_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_79_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_79_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_79_equation_0, values = (var_924_cast_fp16, var_851_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_79_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_81_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_81_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_81_equation_0, values = (var_928_cast_fp16, var_858_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_81_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_83_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_83_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_83_equation_0, values = (var_928_cast_fp16, var_865_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_83_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_85_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_85_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_85_equation_0, values = (var_928_cast_fp16, var_872_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_85_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_87_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_87_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_87_equation_0, values = (var_928_cast_fp16, var_879_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_87_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_89_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_89_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_89_equation_0, values = (var_932_cast_fp16, var_886_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_89_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_91_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_91_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_91_equation_0, values = (var_932_cast_fp16, var_893_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_91_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_93_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_93_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_93_equation_0, values = (var_932_cast_fp16, var_900_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_93_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_95_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_95_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_95_equation_0, values = (var_932_cast_fp16, var_907_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_95_cast_fp16")];
+            tensor<fp16, []> var_1005_to_fp16 = const()[name = tensor<string, []>("op_1005_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_49_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_49_cast_fp16, y = var_1005_to_fp16)[name = tensor<string, []>("aw_chunk_49_cast_fp16")];
+            tensor<fp16, []> var_1007_to_fp16 = const()[name = tensor<string, []>("op_1007_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_51_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_51_cast_fp16, y = var_1007_to_fp16)[name = tensor<string, []>("aw_chunk_51_cast_fp16")];
+            tensor<fp16, []> var_1009_to_fp16 = const()[name = tensor<string, []>("op_1009_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_53_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_53_cast_fp16, y = var_1009_to_fp16)[name = tensor<string, []>("aw_chunk_53_cast_fp16")];
+            tensor<fp16, []> var_1011_to_fp16 = const()[name = tensor<string, []>("op_1011_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_55_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_55_cast_fp16, y = var_1011_to_fp16)[name = tensor<string, []>("aw_chunk_55_cast_fp16")];
+            tensor<fp16, []> var_1013_to_fp16 = const()[name = tensor<string, []>("op_1013_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_57_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_57_cast_fp16, y = var_1013_to_fp16)[name = tensor<string, []>("aw_chunk_57_cast_fp16")];
+            tensor<fp16, []> var_1015_to_fp16 = const()[name = tensor<string, []>("op_1015_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_59_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_59_cast_fp16, y = var_1015_to_fp16)[name = tensor<string, []>("aw_chunk_59_cast_fp16")];
+            tensor<fp16, []> var_1017_to_fp16 = const()[name = tensor<string, []>("op_1017_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_61_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_61_cast_fp16, y = var_1017_to_fp16)[name = tensor<string, []>("aw_chunk_61_cast_fp16")];
+            tensor<fp16, []> var_1019_to_fp16 = const()[name = tensor<string, []>("op_1019_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_63_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_63_cast_fp16, y = var_1019_to_fp16)[name = tensor<string, []>("aw_chunk_63_cast_fp16")];
+            tensor<fp16, []> var_1021_to_fp16 = const()[name = tensor<string, []>("op_1021_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_65_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_65_cast_fp16, y = var_1021_to_fp16)[name = tensor<string, []>("aw_chunk_65_cast_fp16")];
+            tensor<fp16, []> var_1023_to_fp16 = const()[name = tensor<string, []>("op_1023_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_67_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_67_cast_fp16, y = var_1023_to_fp16)[name = tensor<string, []>("aw_chunk_67_cast_fp16")];
+            tensor<fp16, []> var_1025_to_fp16 = const()[name = tensor<string, []>("op_1025_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_69_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_69_cast_fp16, y = var_1025_to_fp16)[name = tensor<string, []>("aw_chunk_69_cast_fp16")];
+            tensor<fp16, []> var_1027_to_fp16 = const()[name = tensor<string, []>("op_1027_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_71_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_71_cast_fp16, y = var_1027_to_fp16)[name = tensor<string, []>("aw_chunk_71_cast_fp16")];
+            tensor<fp16, []> var_1029_to_fp16 = const()[name = tensor<string, []>("op_1029_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_73_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_73_cast_fp16, y = var_1029_to_fp16)[name = tensor<string, []>("aw_chunk_73_cast_fp16")];
+            tensor<fp16, []> var_1031_to_fp16 = const()[name = tensor<string, []>("op_1031_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_75_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_75_cast_fp16, y = var_1031_to_fp16)[name = tensor<string, []>("aw_chunk_75_cast_fp16")];
+            tensor<fp16, []> var_1033_to_fp16 = const()[name = tensor<string, []>("op_1033_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_77_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_77_cast_fp16, y = var_1033_to_fp16)[name = tensor<string, []>("aw_chunk_77_cast_fp16")];
+            tensor<fp16, []> var_1035_to_fp16 = const()[name = tensor<string, []>("op_1035_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_79_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_79_cast_fp16, y = var_1035_to_fp16)[name = tensor<string, []>("aw_chunk_79_cast_fp16")];
+            tensor<fp16, []> var_1037_to_fp16 = const()[name = tensor<string, []>("op_1037_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_81_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_81_cast_fp16, y = var_1037_to_fp16)[name = tensor<string, []>("aw_chunk_81_cast_fp16")];
+            tensor<fp16, []> var_1039_to_fp16 = const()[name = tensor<string, []>("op_1039_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_83_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_83_cast_fp16, y = var_1039_to_fp16)[name = tensor<string, []>("aw_chunk_83_cast_fp16")];
+            tensor<fp16, []> var_1041_to_fp16 = const()[name = tensor<string, []>("op_1041_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_85_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_85_cast_fp16, y = var_1041_to_fp16)[name = tensor<string, []>("aw_chunk_85_cast_fp16")];
+            tensor<fp16, []> var_1043_to_fp16 = const()[name = tensor<string, []>("op_1043_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_87_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_87_cast_fp16, y = var_1043_to_fp16)[name = tensor<string, []>("aw_chunk_87_cast_fp16")];
+            tensor<fp16, []> var_1045_to_fp16 = const()[name = tensor<string, []>("op_1045_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_89_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_89_cast_fp16, y = var_1045_to_fp16)[name = tensor<string, []>("aw_chunk_89_cast_fp16")];
+            tensor<fp16, []> var_1047_to_fp16 = const()[name = tensor<string, []>("op_1047_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_91_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_91_cast_fp16, y = var_1047_to_fp16)[name = tensor<string, []>("aw_chunk_91_cast_fp16")];
+            tensor<fp16, []> var_1049_to_fp16 = const()[name = tensor<string, []>("op_1049_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_93_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_93_cast_fp16, y = var_1049_to_fp16)[name = tensor<string, []>("aw_chunk_93_cast_fp16")];
+            tensor<fp16, []> var_1051_to_fp16 = const()[name = tensor<string, []>("op_1051_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_95_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_95_cast_fp16, y = var_1051_to_fp16)[name = tensor<string, []>("aw_chunk_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1053_cast_fp16 = softmax(axis = var_662, x = aw_chunk_49_cast_fp16)[name = tensor<string, []>("op_1053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1054_cast_fp16 = softmax(axis = var_662, x = aw_chunk_51_cast_fp16)[name = tensor<string, []>("op_1054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1055_cast_fp16 = softmax(axis = var_662, x = aw_chunk_53_cast_fp16)[name = tensor<string, []>("op_1055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1056_cast_fp16 = softmax(axis = var_662, x = aw_chunk_55_cast_fp16)[name = tensor<string, []>("op_1056_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1057_cast_fp16 = softmax(axis = var_662, x = aw_chunk_57_cast_fp16)[name = tensor<string, []>("op_1057_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1058_cast_fp16 = softmax(axis = var_662, x = aw_chunk_59_cast_fp16)[name = tensor<string, []>("op_1058_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1059_cast_fp16 = softmax(axis = var_662, x = aw_chunk_61_cast_fp16)[name = tensor<string, []>("op_1059_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1060_cast_fp16 = softmax(axis = var_662, x = aw_chunk_63_cast_fp16)[name = tensor<string, []>("op_1060_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1061_cast_fp16 = softmax(axis = var_662, x = aw_chunk_65_cast_fp16)[name = tensor<string, []>("op_1061_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1062_cast_fp16 = softmax(axis = var_662, x = aw_chunk_67_cast_fp16)[name = tensor<string, []>("op_1062_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1063_cast_fp16 = softmax(axis = var_662, x = aw_chunk_69_cast_fp16)[name = tensor<string, []>("op_1063_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1064_cast_fp16 = softmax(axis = var_662, x = aw_chunk_71_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1065_cast_fp16 = softmax(axis = var_662, x = aw_chunk_73_cast_fp16)[name = tensor<string, []>("op_1065_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1066_cast_fp16 = softmax(axis = var_662, x = aw_chunk_75_cast_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1067_cast_fp16 = softmax(axis = var_662, x = aw_chunk_77_cast_fp16)[name = tensor<string, []>("op_1067_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1068_cast_fp16 = softmax(axis = var_662, x = aw_chunk_79_cast_fp16)[name = tensor<string, []>("op_1068_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1069_cast_fp16 = softmax(axis = var_662, x = aw_chunk_81_cast_fp16)[name = tensor<string, []>("op_1069_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1070_cast_fp16 = softmax(axis = var_662, x = aw_chunk_83_cast_fp16)[name = tensor<string, []>("op_1070_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1071_cast_fp16 = softmax(axis = var_662, x = aw_chunk_85_cast_fp16)[name = tensor<string, []>("op_1071_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1072_cast_fp16 = softmax(axis = var_662, x = aw_chunk_87_cast_fp16)[name = tensor<string, []>("op_1072_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1073_cast_fp16 = softmax(axis = var_662, x = aw_chunk_89_cast_fp16)[name = tensor<string, []>("op_1073_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1074_cast_fp16 = softmax(axis = var_662, x = aw_chunk_91_cast_fp16)[name = tensor<string, []>("op_1074_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1075_cast_fp16 = softmax(axis = var_662, x = aw_chunk_93_cast_fp16)[name = tensor<string, []>("op_1075_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1076_cast_fp16 = softmax(axis = var_662, x = aw_chunk_95_cast_fp16)[name = tensor<string, []>("op_1076_cast_fp16")];
+            tensor<string, []> var_1078_equation_0 = const()[name = tensor<string, []>("op_1078_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1078_cast_fp16 = einsum(equation = var_1078_equation_0, values = (var_934_cast_fp16, var_1053_cast_fp16))[name = tensor<string, []>("op_1078_cast_fp16")];
+            tensor<string, []> var_1080_equation_0 = const()[name = tensor<string, []>("op_1080_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1080_cast_fp16 = einsum(equation = var_1080_equation_0, values = (var_934_cast_fp16, var_1054_cast_fp16))[name = tensor<string, []>("op_1080_cast_fp16")];
+            tensor<string, []> var_1082_equation_0 = const()[name = tensor<string, []>("op_1082_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1082_cast_fp16 = einsum(equation = var_1082_equation_0, values = (var_934_cast_fp16, var_1055_cast_fp16))[name = tensor<string, []>("op_1082_cast_fp16")];
+            tensor<string, []> var_1084_equation_0 = const()[name = tensor<string, []>("op_1084_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1084_cast_fp16 = einsum(equation = var_1084_equation_0, values = (var_934_cast_fp16, var_1056_cast_fp16))[name = tensor<string, []>("op_1084_cast_fp16")];
+            tensor<string, []> var_1086_equation_0 = const()[name = tensor<string, []>("op_1086_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1086_cast_fp16 = einsum(equation = var_1086_equation_0, values = (var_938_cast_fp16, var_1057_cast_fp16))[name = tensor<string, []>("op_1086_cast_fp16")];
+            tensor<string, []> var_1088_equation_0 = const()[name = tensor<string, []>("op_1088_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1088_cast_fp16 = einsum(equation = var_1088_equation_0, values = (var_938_cast_fp16, var_1058_cast_fp16))[name = tensor<string, []>("op_1088_cast_fp16")];
+            tensor<string, []> var_1090_equation_0 = const()[name = tensor<string, []>("op_1090_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1090_cast_fp16 = einsum(equation = var_1090_equation_0, values = (var_938_cast_fp16, var_1059_cast_fp16))[name = tensor<string, []>("op_1090_cast_fp16")];
+            tensor<string, []> var_1092_equation_0 = const()[name = tensor<string, []>("op_1092_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1092_cast_fp16 = einsum(equation = var_1092_equation_0, values = (var_938_cast_fp16, var_1060_cast_fp16))[name = tensor<string, []>("op_1092_cast_fp16")];
+            tensor<string, []> var_1094_equation_0 = const()[name = tensor<string, []>("op_1094_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1094_cast_fp16 = einsum(equation = var_1094_equation_0, values = (var_942_cast_fp16, var_1061_cast_fp16))[name = tensor<string, []>("op_1094_cast_fp16")];
+            tensor<string, []> var_1096_equation_0 = const()[name = tensor<string, []>("op_1096_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1096_cast_fp16 = einsum(equation = var_1096_equation_0, values = (var_942_cast_fp16, var_1062_cast_fp16))[name = tensor<string, []>("op_1096_cast_fp16")];
+            tensor<string, []> var_1098_equation_0 = const()[name = tensor<string, []>("op_1098_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1098_cast_fp16 = einsum(equation = var_1098_equation_0, values = (var_942_cast_fp16, var_1063_cast_fp16))[name = tensor<string, []>("op_1098_cast_fp16")];
+            tensor<string, []> var_1100_equation_0 = const()[name = tensor<string, []>("op_1100_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1100_cast_fp16 = einsum(equation = var_1100_equation_0, values = (var_942_cast_fp16, var_1064_cast_fp16))[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<string, []> var_1102_equation_0 = const()[name = tensor<string, []>("op_1102_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1102_cast_fp16 = einsum(equation = var_1102_equation_0, values = (var_946_cast_fp16, var_1065_cast_fp16))[name = tensor<string, []>("op_1102_cast_fp16")];
+            tensor<string, []> var_1104_equation_0 = const()[name = tensor<string, []>("op_1104_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1104_cast_fp16 = einsum(equation = var_1104_equation_0, values = (var_946_cast_fp16, var_1066_cast_fp16))[name = tensor<string, []>("op_1104_cast_fp16")];
+            tensor<string, []> var_1106_equation_0 = const()[name = tensor<string, []>("op_1106_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1106_cast_fp16 = einsum(equation = var_1106_equation_0, values = (var_946_cast_fp16, var_1067_cast_fp16))[name = tensor<string, []>("op_1106_cast_fp16")];
+            tensor<string, []> var_1108_equation_0 = const()[name = tensor<string, []>("op_1108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1108_cast_fp16 = einsum(equation = var_1108_equation_0, values = (var_946_cast_fp16, var_1068_cast_fp16))[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<string, []> var_1110_equation_0 = const()[name = tensor<string, []>("op_1110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1110_cast_fp16 = einsum(equation = var_1110_equation_0, values = (var_950_cast_fp16, var_1069_cast_fp16))[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<string, []> var_1112_equation_0 = const()[name = tensor<string, []>("op_1112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1112_cast_fp16 = einsum(equation = var_1112_equation_0, values = (var_950_cast_fp16, var_1070_cast_fp16))[name = tensor<string, []>("op_1112_cast_fp16")];
+            tensor<string, []> var_1114_equation_0 = const()[name = tensor<string, []>("op_1114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1114_cast_fp16 = einsum(equation = var_1114_equation_0, values = (var_950_cast_fp16, var_1071_cast_fp16))[name = tensor<string, []>("op_1114_cast_fp16")];
+            tensor<string, []> var_1116_equation_0 = const()[name = tensor<string, []>("op_1116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1116_cast_fp16 = einsum(equation = var_1116_equation_0, values = (var_950_cast_fp16, var_1072_cast_fp16))[name = tensor<string, []>("op_1116_cast_fp16")];
+            tensor<string, []> var_1118_equation_0 = const()[name = tensor<string, []>("op_1118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1118_cast_fp16 = einsum(equation = var_1118_equation_0, values = (var_954_cast_fp16, var_1073_cast_fp16))[name = tensor<string, []>("op_1118_cast_fp16")];
+            tensor<string, []> var_1120_equation_0 = const()[name = tensor<string, []>("op_1120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1120_cast_fp16 = einsum(equation = var_1120_equation_0, values = (var_954_cast_fp16, var_1074_cast_fp16))[name = tensor<string, []>("op_1120_cast_fp16")];
+            tensor<string, []> var_1122_equation_0 = const()[name = tensor<string, []>("op_1122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1122_cast_fp16 = einsum(equation = var_1122_equation_0, values = (var_954_cast_fp16, var_1075_cast_fp16))[name = tensor<string, []>("op_1122_cast_fp16")];
+            tensor<string, []> var_1124_equation_0 = const()[name = tensor<string, []>("op_1124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1124_cast_fp16 = einsum(equation = var_1124_equation_0, values = (var_954_cast_fp16, var_1076_cast_fp16))[name = tensor<string, []>("op_1124_cast_fp16")];
+            tensor<bool, []> var_1126_interleave_0 = const()[name = tensor<string, []>("op_1126_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1126_cast_fp16 = concat(axis = var_651, interleave = var_1126_interleave_0, values = (var_1078_cast_fp16, var_1080_cast_fp16, var_1082_cast_fp16, var_1084_cast_fp16))[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<bool, []> var_1128_interleave_0 = const()[name = tensor<string, []>("op_1128_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1128_cast_fp16 = concat(axis = var_651, interleave = var_1128_interleave_0, values = (var_1086_cast_fp16, var_1088_cast_fp16, var_1090_cast_fp16, var_1092_cast_fp16))[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<bool, []> var_1130_interleave_0 = const()[name = tensor<string, []>("op_1130_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1130_cast_fp16 = concat(axis = var_651, interleave = var_1130_interleave_0, values = (var_1094_cast_fp16, var_1096_cast_fp16, var_1098_cast_fp16, var_1100_cast_fp16))[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<bool, []> var_1132_interleave_0 = const()[name = tensor<string, []>("op_1132_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1132_cast_fp16 = concat(axis = var_651, interleave = var_1132_interleave_0, values = (var_1102_cast_fp16, var_1104_cast_fp16, var_1106_cast_fp16, var_1108_cast_fp16))[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<bool, []> var_1134_interleave_0 = const()[name = tensor<string, []>("op_1134_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1134_cast_fp16 = concat(axis = var_651, interleave = var_1134_interleave_0, values = (var_1110_cast_fp16, var_1112_cast_fp16, var_1114_cast_fp16, var_1116_cast_fp16))[name = tensor<string, []>("op_1134_cast_fp16")];
+            tensor<bool, []> var_1136_interleave_0 = const()[name = tensor<string, []>("op_1136_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1136_cast_fp16 = concat(axis = var_651, interleave = var_1136_interleave_0, values = (var_1118_cast_fp16, var_1120_cast_fp16, var_1122_cast_fp16, var_1124_cast_fp16))[name = tensor<string, []>("op_1136_cast_fp16")];
+            tensor<bool, []> input_9_interleave_0 = const()[name = tensor<string, []>("input_9_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_9_cast_fp16 = concat(axis = var_662, interleave = input_9_interleave_0, values = (var_1126_cast_fp16, var_1128_cast_fp16, var_1130_cast_fp16, var_1132_cast_fp16, var_1134_cast_fp16, var_1136_cast_fp16))[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_7_strides_0 = const()[name = tensor<string, []>("obj_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_7_dilations_0 = const()[name = tensor<string, []>("obj_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_7_groups_0 = const()[name = tensor<string, []>("obj_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6662016)))];
+            tensor<fp16, [384]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6956992)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = tensor<string, []>("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1155_to_fp16 = const()[name = tensor<string, []>("op_1155_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_1155_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [384]> input_11_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_11_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6957824)))];
+            tensor<fp16, [384]> input_11_beta_0_to_fp16 = const()[name = tensor<string, []>("input_11_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6958656)))];
+            tensor<fp16, []> input_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> input_13_pad_type_0 = const()[name = tensor<string, []>("input_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_13_strides_0 = const()[name = tensor<string, []>("input_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_13_dilations_0 = const()[name = tensor<string, []>("input_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_13_groups_0 = const()[name = tensor<string, []>("input_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6959488)))];
+            tensor<fp16, [1536]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8139200)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = tensor<string, []>("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = tensor<string, []>("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_7_groups_0 = const()[name = tensor<string, []>("hidden_states_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8142336)))];
+            tensor<fp16, [384]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322048)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_1184 = const()[name = tensor<string, []>("op_1184"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1195 = const()[name = tensor<string, []>("op_1195"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = tensor<string, []>("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1212_to_fp16 = const()[name = tensor<string, []>("op_1212_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_1212_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [384]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322880)))];
+            tensor<fp16, [384]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9323712)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_5_strides_0 = const()[name = tensor<string, []>("query_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_5_dilations_0 = const()[name = tensor<string, []>("query_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_5_groups_0 = const()[name = tensor<string, []>("query_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9324544)))];
+            tensor<fp16, [384]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9619520)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<string, []> key_5_pad_type_0 = const()[name = tensor<string, []>("key_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_5_strides_0 = const()[name = tensor<string, []>("key_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_5_pad_0 = const()[name = tensor<string, []>("key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_5_dilations_0 = const()[name = tensor<string, []>("key_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_5_groups_0 = const()[name = tensor<string, []>("key_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9620352)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_5_cast_fp16 = conv(dilations = key_5_dilations_0, groups = key_5_groups_0, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<string, []> value_5_pad_type_0 = const()[name = tensor<string, []>("value_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_5_strides_0 = const()[name = tensor<string, []>("value_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_5_pad_0 = const()[name = tensor<string, []>("value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_5_dilations_0 = const()[name = tensor<string, []>("value_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_5_groups_0 = const()[name = tensor<string, []>("value_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9915328)))];
+            tensor<fp16, [384]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10210304)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = value_5_dilations_0, groups = value_5_groups_0, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_1250_begin_0 = const()[name = tensor<string, []>("op_1250_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1250_end_0 = const()[name = tensor<string, []>("op_1250_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1250_end_mask_0 = const()[name = tensor<string, []>("op_1250_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1250_cast_fp16 = slice_by_index(begin = var_1250_begin_0, end = var_1250_end_0, end_mask = var_1250_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1250_cast_fp16")];
+            tensor<int32, [4]> var_1254_begin_0 = const()[name = tensor<string, []>("op_1254_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1254_end_0 = const()[name = tensor<string, []>("op_1254_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1254_end_mask_0 = const()[name = tensor<string, []>("op_1254_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1254_cast_fp16 = slice_by_index(begin = var_1254_begin_0, end = var_1254_end_0, end_mask = var_1254_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<int32, [4]> var_1258_begin_0 = const()[name = tensor<string, []>("op_1258_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1258_end_0 = const()[name = tensor<string, []>("op_1258_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1258_end_mask_0 = const()[name = tensor<string, []>("op_1258_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1258_cast_fp16 = slice_by_index(begin = var_1258_begin_0, end = var_1258_end_0, end_mask = var_1258_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1258_cast_fp16")];
+            tensor<int32, [4]> var_1262_begin_0 = const()[name = tensor<string, []>("op_1262_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1262_end_0 = const()[name = tensor<string, []>("op_1262_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1262_end_mask_0 = const()[name = tensor<string, []>("op_1262_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = var_1262_end_0, end_mask = var_1262_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1262_cast_fp16")];
+            tensor<int32, [4]> var_1266_begin_0 = const()[name = tensor<string, []>("op_1266_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1266_end_0 = const()[name = tensor<string, []>("op_1266_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1266_end_mask_0 = const()[name = tensor<string, []>("op_1266_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1266_cast_fp16 = slice_by_index(begin = var_1266_begin_0, end = var_1266_end_0, end_mask = var_1266_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1266_cast_fp16")];
+            tensor<int32, [4]> var_1270_begin_0 = const()[name = tensor<string, []>("op_1270_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1270_end_0 = const()[name = tensor<string, []>("op_1270_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1270_end_mask_0 = const()[name = tensor<string, []>("op_1270_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1270_cast_fp16 = slice_by_index(begin = var_1270_begin_0, end = var_1270_end_0, end_mask = var_1270_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1270_cast_fp16")];
+            tensor<int32, [4]> var_1279_begin_0 = const()[name = tensor<string, []>("op_1279_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1279_end_0 = const()[name = tensor<string, []>("op_1279_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1279_end_mask_0 = const()[name = tensor<string, []>("op_1279_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1279_cast_fp16 = slice_by_index(begin = var_1279_begin_0, end = var_1279_end_0, end_mask = var_1279_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1279_cast_fp16")];
+            tensor<int32, [4]> var_1286_begin_0 = const()[name = tensor<string, []>("op_1286_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1286_end_0 = const()[name = tensor<string, []>("op_1286_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1286_end_mask_0 = const()[name = tensor<string, []>("op_1286_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1286_cast_fp16 = slice_by_index(begin = var_1286_begin_0, end = var_1286_end_0, end_mask = var_1286_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1286_cast_fp16")];
+            tensor<int32, [4]> var_1293_begin_0 = const()[name = tensor<string, []>("op_1293_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1293_end_0 = const()[name = tensor<string, []>("op_1293_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1293_end_mask_0 = const()[name = tensor<string, []>("op_1293_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1293_cast_fp16 = slice_by_index(begin = var_1293_begin_0, end = var_1293_end_0, end_mask = var_1293_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1293_cast_fp16")];
+            tensor<int32, [4]> var_1300_begin_0 = const()[name = tensor<string, []>("op_1300_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1300_end_0 = const()[name = tensor<string, []>("op_1300_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1300_end_mask_0 = const()[name = tensor<string, []>("op_1300_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1300_cast_fp16")];
+            tensor<int32, [4]> var_1307_begin_0 = const()[name = tensor<string, []>("op_1307_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1307_end_0 = const()[name = tensor<string, []>("op_1307_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1307_end_mask_0 = const()[name = tensor<string, []>("op_1307_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1307_cast_fp16 = slice_by_index(begin = var_1307_begin_0, end = var_1307_end_0, end_mask = var_1307_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1307_cast_fp16")];
+            tensor<int32, [4]> var_1314_begin_0 = const()[name = tensor<string, []>("op_1314_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1314_end_0 = const()[name = tensor<string, []>("op_1314_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1314_end_mask_0 = const()[name = tensor<string, []>("op_1314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1314_cast_fp16 = slice_by_index(begin = var_1314_begin_0, end = var_1314_end_0, end_mask = var_1314_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1314_cast_fp16")];
+            tensor<int32, [4]> var_1321_begin_0 = const()[name = tensor<string, []>("op_1321_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1321_end_0 = const()[name = tensor<string, []>("op_1321_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1321_end_mask_0 = const()[name = tensor<string, []>("op_1321_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1321_cast_fp16 = slice_by_index(begin = var_1321_begin_0, end = var_1321_end_0, end_mask = var_1321_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1321_cast_fp16")];
+            tensor<int32, [4]> var_1328_begin_0 = const()[name = tensor<string, []>("op_1328_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1328_end_0 = const()[name = tensor<string, []>("op_1328_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1328_end_mask_0 = const()[name = tensor<string, []>("op_1328_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1328_cast_fp16")];
+            tensor<int32, [4]> var_1335_begin_0 = const()[name = tensor<string, []>("op_1335_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1335_end_0 = const()[name = tensor<string, []>("op_1335_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1335_end_mask_0 = const()[name = tensor<string, []>("op_1335_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1335_cast_fp16 = slice_by_index(begin = var_1335_begin_0, end = var_1335_end_0, end_mask = var_1335_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1335_cast_fp16")];
+            tensor<int32, [4]> var_1342_begin_0 = const()[name = tensor<string, []>("op_1342_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1342_end_0 = const()[name = tensor<string, []>("op_1342_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1342_end_mask_0 = const()[name = tensor<string, []>("op_1342_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1342_cast_fp16 = slice_by_index(begin = var_1342_begin_0, end = var_1342_end_0, end_mask = var_1342_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
+            tensor<int32, [4]> var_1349_begin_0 = const()[name = tensor<string, []>("op_1349_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1349_end_0 = const()[name = tensor<string, []>("op_1349_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1349_end_mask_0 = const()[name = tensor<string, []>("op_1349_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1349_cast_fp16 = slice_by_index(begin = var_1349_begin_0, end = var_1349_end_0, end_mask = var_1349_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1349_cast_fp16")];
+            tensor<int32, [4]> var_1356_begin_0 = const()[name = tensor<string, []>("op_1356_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1356_end_0 = const()[name = tensor<string, []>("op_1356_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1356_end_mask_0 = const()[name = tensor<string, []>("op_1356_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1356_cast_fp16")];
+            tensor<int32, [4]> var_1363_begin_0 = const()[name = tensor<string, []>("op_1363_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1363_end_0 = const()[name = tensor<string, []>("op_1363_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1363_end_mask_0 = const()[name = tensor<string, []>("op_1363_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1363_cast_fp16 = slice_by_index(begin = var_1363_begin_0, end = var_1363_end_0, end_mask = var_1363_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<int32, [4]> var_1370_begin_0 = const()[name = tensor<string, []>("op_1370_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1370_end_0 = const()[name = tensor<string, []>("op_1370_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1370_end_mask_0 = const()[name = tensor<string, []>("op_1370_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1370_cast_fp16 = slice_by_index(begin = var_1370_begin_0, end = var_1370_end_0, end_mask = var_1370_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1370_cast_fp16")];
+            tensor<int32, [4]> var_1377_begin_0 = const()[name = tensor<string, []>("op_1377_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1377_end_0 = const()[name = tensor<string, []>("op_1377_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1377_end_mask_0 = const()[name = tensor<string, []>("op_1377_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1377_cast_fp16 = slice_by_index(begin = var_1377_begin_0, end = var_1377_end_0, end_mask = var_1377_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1377_cast_fp16")];
+            tensor<int32, [4]> var_1384_begin_0 = const()[name = tensor<string, []>("op_1384_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1384_end_0 = const()[name = tensor<string, []>("op_1384_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1384_end_mask_0 = const()[name = tensor<string, []>("op_1384_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1384_cast_fp16 = slice_by_index(begin = var_1384_begin_0, end = var_1384_end_0, end_mask = var_1384_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1384_cast_fp16")];
+            tensor<int32, [4]> var_1391_begin_0 = const()[name = tensor<string, []>("op_1391_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1391_end_0 = const()[name = tensor<string, []>("op_1391_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1391_end_mask_0 = const()[name = tensor<string, []>("op_1391_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1391_cast_fp16 = slice_by_index(begin = var_1391_begin_0, end = var_1391_end_0, end_mask = var_1391_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1391_cast_fp16")];
+            tensor<int32, [4]> var_1398_begin_0 = const()[name = tensor<string, []>("op_1398_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1398_end_0 = const()[name = tensor<string, []>("op_1398_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1398_end_mask_0 = const()[name = tensor<string, []>("op_1398_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1398_cast_fp16")];
+            tensor<int32, [4]> var_1405_begin_0 = const()[name = tensor<string, []>("op_1405_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1405_end_0 = const()[name = tensor<string, []>("op_1405_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1405_end_mask_0 = const()[name = tensor<string, []>("op_1405_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1405_cast_fp16 = slice_by_index(begin = var_1405_begin_0, end = var_1405_end_0, end_mask = var_1405_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1405_cast_fp16")];
+            tensor<int32, [4]> var_1412_begin_0 = const()[name = tensor<string, []>("op_1412_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1412_end_0 = const()[name = tensor<string, []>("op_1412_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1412_end_mask_0 = const()[name = tensor<string, []>("op_1412_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1412_cast_fp16 = slice_by_index(begin = var_1412_begin_0, end = var_1412_end_0, end_mask = var_1412_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1412_cast_fp16")];
+            tensor<int32, [4]> var_1419_begin_0 = const()[name = tensor<string, []>("op_1419_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1419_end_0 = const()[name = tensor<string, []>("op_1419_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1419_end_mask_0 = const()[name = tensor<string, []>("op_1419_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1419_cast_fp16 = slice_by_index(begin = var_1419_begin_0, end = var_1419_end_0, end_mask = var_1419_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1419_cast_fp16")];
+            tensor<int32, [4]> var_1426_begin_0 = const()[name = tensor<string, []>("op_1426_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1426_end_0 = const()[name = tensor<string, []>("op_1426_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1426_end_mask_0 = const()[name = tensor<string, []>("op_1426_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1426_cast_fp16")];
+            tensor<int32, [4]> var_1433_begin_0 = const()[name = tensor<string, []>("op_1433_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1433_end_0 = const()[name = tensor<string, []>("op_1433_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1433_end_mask_0 = const()[name = tensor<string, []>("op_1433_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1433_cast_fp16 = slice_by_index(begin = var_1433_begin_0, end = var_1433_end_0, end_mask = var_1433_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
+            tensor<int32, [4]> var_1440_begin_0 = const()[name = tensor<string, []>("op_1440_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1440_end_0 = const()[name = tensor<string, []>("op_1440_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1440_end_mask_0 = const()[name = tensor<string, []>("op_1440_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1440_cast_fp16 = slice_by_index(begin = var_1440_begin_0, end = var_1440_end_0, end_mask = var_1440_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1440_cast_fp16")];
+            tensor<int32, [4]> k_5_perm_0 = const()[name = tensor<string, []>("k_5_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1445_begin_0 = const()[name = tensor<string, []>("op_1445_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1445_end_0 = const()[name = tensor<string, []>("op_1445_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1445_end_mask_0 = const()[name = tensor<string, []>("op_1445_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1445_cast_fp16 = slice_by_index(begin = var_1445_begin_0, end = var_1445_end_0, end_mask = var_1445_end_mask_0, x = k_5_cast_fp16)[name = tensor<string, []>("op_1445_cast_fp16")];
+            tensor<int32, [4]> var_1449_begin_0 = const()[name = tensor<string, []>("op_1449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1449_end_0 = const()[name = tensor<string, []>("op_1449_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1449_end_mask_0 = const()[name = tensor<string, []>("op_1449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1449_cast_fp16 = slice_by_index(begin = var_1449_begin_0, end = var_1449_end_0, end_mask = var_1449_end_mask_0, x = k_5_cast_fp16)[name = tensor<string, []>("op_1449_cast_fp16")];
+            tensor<int32, [4]> var_1453_begin_0 = const()[name = tensor<string, []>("op_1453_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1453_end_0 = const()[name = tensor<string, []>("op_1453_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1453_end_mask_0 = const()[name = tensor<string, []>("op_1453_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = var_1453_end_0, end_mask = var_1453_end_mask_0, x = k_5_cast_fp16)[name = tensor<string, []>("op_1453_cast_fp16")];
+            tensor<int32, [4]> var_1457_begin_0 = const()[name = tensor<string, []>("op_1457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1457_end_0 = const()[name = tensor<string, []>("op_1457_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1457_end_mask_0 = const()[name = tensor<string, []>("op_1457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1457_cast_fp16 = slice_by_index(begin = var_1457_begin_0, end = var_1457_end_0, end_mask = var_1457_end_mask_0, x = k_5_cast_fp16)[name = tensor<string, []>("op_1457_cast_fp16")];
+            tensor<int32, [4]> var_1461_begin_0 = const()[name = tensor<string, []>("op_1461_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1461_end_0 = const()[name = tensor<string, []>("op_1461_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1461_end_mask_0 = const()[name = tensor<string, []>("op_1461_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1461_cast_fp16 = slice_by_index(begin = var_1461_begin_0, end = var_1461_end_0, end_mask = var_1461_end_mask_0, x = k_5_cast_fp16)[name = tensor<string, []>("op_1461_cast_fp16")];
+            tensor<int32, [4]> var_1465_begin_0 = const()[name = tensor<string, []>("op_1465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1465_end_0 = const()[name = tensor<string, []>("op_1465_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1465_end_mask_0 = const()[name = tensor<string, []>("op_1465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1465_cast_fp16 = slice_by_index(begin = var_1465_begin_0, end = var_1465_end_0, end_mask = var_1465_end_mask_0, x = k_5_cast_fp16)[name = tensor<string, []>("op_1465_cast_fp16")];
+            tensor<int32, [4]> var_1467_begin_0 = const()[name = tensor<string, []>("op_1467_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1467_end_0 = const()[name = tensor<string, []>("op_1467_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1467_end_mask_0 = const()[name = tensor<string, []>("op_1467_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1467_cast_fp16 = slice_by_index(begin = var_1467_begin_0, end = var_1467_end_0, end_mask = var_1467_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1467_cast_fp16")];
+            tensor<int32, [4]> var_1471_begin_0 = const()[name = tensor<string, []>("op_1471_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1471_end_0 = const()[name = tensor<string, []>("op_1471_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1471_end_mask_0 = const()[name = tensor<string, []>("op_1471_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1471_cast_fp16 = slice_by_index(begin = var_1471_begin_0, end = var_1471_end_0, end_mask = var_1471_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1471_cast_fp16")];
+            tensor<int32, [4]> var_1475_begin_0 = const()[name = tensor<string, []>("op_1475_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1475_end_0 = const()[name = tensor<string, []>("op_1475_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1475_end_mask_0 = const()[name = tensor<string, []>("op_1475_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1475_cast_fp16 = slice_by_index(begin = var_1475_begin_0, end = var_1475_end_0, end_mask = var_1475_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1475_cast_fp16")];
+            tensor<int32, [4]> var_1479_begin_0 = const()[name = tensor<string, []>("op_1479_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1479_end_0 = const()[name = tensor<string, []>("op_1479_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1479_end_mask_0 = const()[name = tensor<string, []>("op_1479_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1479_cast_fp16 = slice_by_index(begin = var_1479_begin_0, end = var_1479_end_0, end_mask = var_1479_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1479_cast_fp16")];
+            tensor<int32, [4]> var_1483_begin_0 = const()[name = tensor<string, []>("op_1483_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1483_end_0 = const()[name = tensor<string, []>("op_1483_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1483_end_mask_0 = const()[name = tensor<string, []>("op_1483_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1483_cast_fp16 = slice_by_index(begin = var_1483_begin_0, end = var_1483_end_0, end_mask = var_1483_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1483_cast_fp16")];
+            tensor<int32, [4]> var_1487_begin_0 = const()[name = tensor<string, []>("op_1487_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1487_end_0 = const()[name = tensor<string, []>("op_1487_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1487_end_mask_0 = const()[name = tensor<string, []>("op_1487_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1487_cast_fp16 = slice_by_index(begin = var_1487_begin_0, end = var_1487_end_0, end_mask = var_1487_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1487_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_97_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_97_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_97_equation_0, values = (var_1445_cast_fp16, var_1279_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_97_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_99_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_99_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_99_equation_0, values = (var_1445_cast_fp16, var_1286_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_99_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_101_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_101_equation_0, values = (var_1445_cast_fp16, var_1293_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_101_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_103_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_103_equation_0, values = (var_1445_cast_fp16, var_1300_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_103_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_105_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_105_equation_0, values = (var_1449_cast_fp16, var_1307_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_105_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_107_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_107_equation_0, values = (var_1449_cast_fp16, var_1314_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_107_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_109_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_109_equation_0, values = (var_1449_cast_fp16, var_1321_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_109_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_111_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_111_equation_0, values = (var_1449_cast_fp16, var_1328_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_111_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_113_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_113_equation_0, values = (var_1453_cast_fp16, var_1335_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_113_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_115_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_115_equation_0, values = (var_1453_cast_fp16, var_1342_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_115_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_117_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_117_equation_0, values = (var_1453_cast_fp16, var_1349_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_117_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_119_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_119_equation_0, values = (var_1453_cast_fp16, var_1356_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_119_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_121_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_121_equation_0, values = (var_1457_cast_fp16, var_1363_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_121_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_123_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_123_equation_0, values = (var_1457_cast_fp16, var_1370_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_123_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_125_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_125_equation_0, values = (var_1457_cast_fp16, var_1377_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_125_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_127_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_127_equation_0, values = (var_1457_cast_fp16, var_1384_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_127_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_129_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_129_equation_0, values = (var_1461_cast_fp16, var_1391_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_129_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_131_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_131_equation_0, values = (var_1461_cast_fp16, var_1398_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_131_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_133_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_133_equation_0, values = (var_1461_cast_fp16, var_1405_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_133_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_135_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_135_equation_0, values = (var_1461_cast_fp16, var_1412_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_135_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_137_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_137_equation_0, values = (var_1465_cast_fp16, var_1419_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_137_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_139_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_139_equation_0, values = (var_1465_cast_fp16, var_1426_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_139_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_141_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_141_equation_0, values = (var_1465_cast_fp16, var_1433_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_141_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_143_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_143_equation_0, values = (var_1465_cast_fp16, var_1440_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_143_cast_fp16")];
+            tensor<fp16, []> var_1538_to_fp16 = const()[name = tensor<string, []>("op_1538_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_97_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_97_cast_fp16, y = var_1538_to_fp16)[name = tensor<string, []>("aw_chunk_97_cast_fp16")];
+            tensor<fp16, []> var_1540_to_fp16 = const()[name = tensor<string, []>("op_1540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_99_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_99_cast_fp16, y = var_1540_to_fp16)[name = tensor<string, []>("aw_chunk_99_cast_fp16")];
+            tensor<fp16, []> var_1542_to_fp16 = const()[name = tensor<string, []>("op_1542_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_101_cast_fp16, y = var_1542_to_fp16)[name = tensor<string, []>("aw_chunk_101_cast_fp16")];
+            tensor<fp16, []> var_1544_to_fp16 = const()[name = tensor<string, []>("op_1544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_103_cast_fp16, y = var_1544_to_fp16)[name = tensor<string, []>("aw_chunk_103_cast_fp16")];
+            tensor<fp16, []> var_1546_to_fp16 = const()[name = tensor<string, []>("op_1546_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_105_cast_fp16, y = var_1546_to_fp16)[name = tensor<string, []>("aw_chunk_105_cast_fp16")];
+            tensor<fp16, []> var_1548_to_fp16 = const()[name = tensor<string, []>("op_1548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_107_cast_fp16, y = var_1548_to_fp16)[name = tensor<string, []>("aw_chunk_107_cast_fp16")];
+            tensor<fp16, []> var_1550_to_fp16 = const()[name = tensor<string, []>("op_1550_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_109_cast_fp16, y = var_1550_to_fp16)[name = tensor<string, []>("aw_chunk_109_cast_fp16")];
+            tensor<fp16, []> var_1552_to_fp16 = const()[name = tensor<string, []>("op_1552_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_111_cast_fp16, y = var_1552_to_fp16)[name = tensor<string, []>("aw_chunk_111_cast_fp16")];
+            tensor<fp16, []> var_1554_to_fp16 = const()[name = tensor<string, []>("op_1554_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_113_cast_fp16, y = var_1554_to_fp16)[name = tensor<string, []>("aw_chunk_113_cast_fp16")];
+            tensor<fp16, []> var_1556_to_fp16 = const()[name = tensor<string, []>("op_1556_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_115_cast_fp16, y = var_1556_to_fp16)[name = tensor<string, []>("aw_chunk_115_cast_fp16")];
+            tensor<fp16, []> var_1558_to_fp16 = const()[name = tensor<string, []>("op_1558_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_117_cast_fp16, y = var_1558_to_fp16)[name = tensor<string, []>("aw_chunk_117_cast_fp16")];
+            tensor<fp16, []> var_1560_to_fp16 = const()[name = tensor<string, []>("op_1560_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_119_cast_fp16, y = var_1560_to_fp16)[name = tensor<string, []>("aw_chunk_119_cast_fp16")];
+            tensor<fp16, []> var_1562_to_fp16 = const()[name = tensor<string, []>("op_1562_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_121_cast_fp16, y = var_1562_to_fp16)[name = tensor<string, []>("aw_chunk_121_cast_fp16")];
+            tensor<fp16, []> var_1564_to_fp16 = const()[name = tensor<string, []>("op_1564_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_123_cast_fp16, y = var_1564_to_fp16)[name = tensor<string, []>("aw_chunk_123_cast_fp16")];
+            tensor<fp16, []> var_1566_to_fp16 = const()[name = tensor<string, []>("op_1566_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_125_cast_fp16, y = var_1566_to_fp16)[name = tensor<string, []>("aw_chunk_125_cast_fp16")];
+            tensor<fp16, []> var_1568_to_fp16 = const()[name = tensor<string, []>("op_1568_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_127_cast_fp16, y = var_1568_to_fp16)[name = tensor<string, []>("aw_chunk_127_cast_fp16")];
+            tensor<fp16, []> var_1570_to_fp16 = const()[name = tensor<string, []>("op_1570_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_129_cast_fp16, y = var_1570_to_fp16)[name = tensor<string, []>("aw_chunk_129_cast_fp16")];
+            tensor<fp16, []> var_1572_to_fp16 = const()[name = tensor<string, []>("op_1572_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_131_cast_fp16, y = var_1572_to_fp16)[name = tensor<string, []>("aw_chunk_131_cast_fp16")];
+            tensor<fp16, []> var_1574_to_fp16 = const()[name = tensor<string, []>("op_1574_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_133_cast_fp16, y = var_1574_to_fp16)[name = tensor<string, []>("aw_chunk_133_cast_fp16")];
+            tensor<fp16, []> var_1576_to_fp16 = const()[name = tensor<string, []>("op_1576_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_135_cast_fp16, y = var_1576_to_fp16)[name = tensor<string, []>("aw_chunk_135_cast_fp16")];
+            tensor<fp16, []> var_1578_to_fp16 = const()[name = tensor<string, []>("op_1578_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_137_cast_fp16, y = var_1578_to_fp16)[name = tensor<string, []>("aw_chunk_137_cast_fp16")];
+            tensor<fp16, []> var_1580_to_fp16 = const()[name = tensor<string, []>("op_1580_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_139_cast_fp16, y = var_1580_to_fp16)[name = tensor<string, []>("aw_chunk_139_cast_fp16")];
+            tensor<fp16, []> var_1582_to_fp16 = const()[name = tensor<string, []>("op_1582_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_141_cast_fp16, y = var_1582_to_fp16)[name = tensor<string, []>("aw_chunk_141_cast_fp16")];
+            tensor<fp16, []> var_1584_to_fp16 = const()[name = tensor<string, []>("op_1584_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_143_cast_fp16, y = var_1584_to_fp16)[name = tensor<string, []>("aw_chunk_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1586_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_97_cast_fp16)[name = tensor<string, []>("op_1586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1587_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_99_cast_fp16)[name = tensor<string, []>("op_1587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1588_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_101_cast_fp16)[name = tensor<string, []>("op_1588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1589_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_103_cast_fp16)[name = tensor<string, []>("op_1589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1590_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_105_cast_fp16)[name = tensor<string, []>("op_1590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1591_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_107_cast_fp16)[name = tensor<string, []>("op_1591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1592_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_109_cast_fp16)[name = tensor<string, []>("op_1592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1593_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_111_cast_fp16)[name = tensor<string, []>("op_1593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1594_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_113_cast_fp16)[name = tensor<string, []>("op_1594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1595_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_115_cast_fp16)[name = tensor<string, []>("op_1595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1596_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_117_cast_fp16)[name = tensor<string, []>("op_1596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1597_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_119_cast_fp16)[name = tensor<string, []>("op_1597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1598_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_121_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1599_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_123_cast_fp16)[name = tensor<string, []>("op_1599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1600_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_125_cast_fp16)[name = tensor<string, []>("op_1600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1601_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_127_cast_fp16)[name = tensor<string, []>("op_1601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1602_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_129_cast_fp16)[name = tensor<string, []>("op_1602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1603_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_131_cast_fp16)[name = tensor<string, []>("op_1603_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1604_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_133_cast_fp16)[name = tensor<string, []>("op_1604_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1605_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_135_cast_fp16)[name = tensor<string, []>("op_1605_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1606_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_137_cast_fp16)[name = tensor<string, []>("op_1606_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1607_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_139_cast_fp16)[name = tensor<string, []>("op_1607_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1608_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_141_cast_fp16)[name = tensor<string, []>("op_1608_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1609_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_143_cast_fp16)[name = tensor<string, []>("op_1609_cast_fp16")];
+            tensor<string, []> var_1611_equation_0 = const()[name = tensor<string, []>("op_1611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1611_cast_fp16 = einsum(equation = var_1611_equation_0, values = (var_1467_cast_fp16, var_1586_cast_fp16))[name = tensor<string, []>("op_1611_cast_fp16")];
+            tensor<string, []> var_1613_equation_0 = const()[name = tensor<string, []>("op_1613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1613_cast_fp16 = einsum(equation = var_1613_equation_0, values = (var_1467_cast_fp16, var_1587_cast_fp16))[name = tensor<string, []>("op_1613_cast_fp16")];
+            tensor<string, []> var_1615_equation_0 = const()[name = tensor<string, []>("op_1615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1615_cast_fp16 = einsum(equation = var_1615_equation_0, values = (var_1467_cast_fp16, var_1588_cast_fp16))[name = tensor<string, []>("op_1615_cast_fp16")];
+            tensor<string, []> var_1617_equation_0 = const()[name = tensor<string, []>("op_1617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1617_cast_fp16 = einsum(equation = var_1617_equation_0, values = (var_1467_cast_fp16, var_1589_cast_fp16))[name = tensor<string, []>("op_1617_cast_fp16")];
+            tensor<string, []> var_1619_equation_0 = const()[name = tensor<string, []>("op_1619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1619_cast_fp16 = einsum(equation = var_1619_equation_0, values = (var_1471_cast_fp16, var_1590_cast_fp16))[name = tensor<string, []>("op_1619_cast_fp16")];
+            tensor<string, []> var_1621_equation_0 = const()[name = tensor<string, []>("op_1621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1621_cast_fp16 = einsum(equation = var_1621_equation_0, values = (var_1471_cast_fp16, var_1591_cast_fp16))[name = tensor<string, []>("op_1621_cast_fp16")];
+            tensor<string, []> var_1623_equation_0 = const()[name = tensor<string, []>("op_1623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1623_cast_fp16 = einsum(equation = var_1623_equation_0, values = (var_1471_cast_fp16, var_1592_cast_fp16))[name = tensor<string, []>("op_1623_cast_fp16")];
+            tensor<string, []> var_1625_equation_0 = const()[name = tensor<string, []>("op_1625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1625_cast_fp16 = einsum(equation = var_1625_equation_0, values = (var_1471_cast_fp16, var_1593_cast_fp16))[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<string, []> var_1627_equation_0 = const()[name = tensor<string, []>("op_1627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1627_cast_fp16 = einsum(equation = var_1627_equation_0, values = (var_1475_cast_fp16, var_1594_cast_fp16))[name = tensor<string, []>("op_1627_cast_fp16")];
+            tensor<string, []> var_1629_equation_0 = const()[name = tensor<string, []>("op_1629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1629_cast_fp16 = einsum(equation = var_1629_equation_0, values = (var_1475_cast_fp16, var_1595_cast_fp16))[name = tensor<string, []>("op_1629_cast_fp16")];
+            tensor<string, []> var_1631_equation_0 = const()[name = tensor<string, []>("op_1631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1631_cast_fp16 = einsum(equation = var_1631_equation_0, values = (var_1475_cast_fp16, var_1596_cast_fp16))[name = tensor<string, []>("op_1631_cast_fp16")];
+            tensor<string, []> var_1633_equation_0 = const()[name = tensor<string, []>("op_1633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1633_cast_fp16 = einsum(equation = var_1633_equation_0, values = (var_1475_cast_fp16, var_1597_cast_fp16))[name = tensor<string, []>("op_1633_cast_fp16")];
+            tensor<string, []> var_1635_equation_0 = const()[name = tensor<string, []>("op_1635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1635_cast_fp16 = einsum(equation = var_1635_equation_0, values = (var_1479_cast_fp16, var_1598_cast_fp16))[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<string, []> var_1637_equation_0 = const()[name = tensor<string, []>("op_1637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1637_cast_fp16 = einsum(equation = var_1637_equation_0, values = (var_1479_cast_fp16, var_1599_cast_fp16))[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<string, []> var_1639_equation_0 = const()[name = tensor<string, []>("op_1639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1639_cast_fp16 = einsum(equation = var_1639_equation_0, values = (var_1479_cast_fp16, var_1600_cast_fp16))[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<string, []> var_1641_equation_0 = const()[name = tensor<string, []>("op_1641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1641_cast_fp16 = einsum(equation = var_1641_equation_0, values = (var_1479_cast_fp16, var_1601_cast_fp16))[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<string, []> var_1643_equation_0 = const()[name = tensor<string, []>("op_1643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1643_cast_fp16 = einsum(equation = var_1643_equation_0, values = (var_1483_cast_fp16, var_1602_cast_fp16))[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<string, []> var_1645_equation_0 = const()[name = tensor<string, []>("op_1645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1645_cast_fp16 = einsum(equation = var_1645_equation_0, values = (var_1483_cast_fp16, var_1603_cast_fp16))[name = tensor<string, []>("op_1645_cast_fp16")];
+            tensor<string, []> var_1647_equation_0 = const()[name = tensor<string, []>("op_1647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1647_cast_fp16 = einsum(equation = var_1647_equation_0, values = (var_1483_cast_fp16, var_1604_cast_fp16))[name = tensor<string, []>("op_1647_cast_fp16")];
+            tensor<string, []> var_1649_equation_0 = const()[name = tensor<string, []>("op_1649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1649_cast_fp16 = einsum(equation = var_1649_equation_0, values = (var_1483_cast_fp16, var_1605_cast_fp16))[name = tensor<string, []>("op_1649_cast_fp16")];
+            tensor<string, []> var_1651_equation_0 = const()[name = tensor<string, []>("op_1651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1651_cast_fp16 = einsum(equation = var_1651_equation_0, values = (var_1487_cast_fp16, var_1606_cast_fp16))[name = tensor<string, []>("op_1651_cast_fp16")];
+            tensor<string, []> var_1653_equation_0 = const()[name = tensor<string, []>("op_1653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1653_cast_fp16 = einsum(equation = var_1653_equation_0, values = (var_1487_cast_fp16, var_1607_cast_fp16))[name = tensor<string, []>("op_1653_cast_fp16")];
+            tensor<string, []> var_1655_equation_0 = const()[name = tensor<string, []>("op_1655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1655_cast_fp16 = einsum(equation = var_1655_equation_0, values = (var_1487_cast_fp16, var_1608_cast_fp16))[name = tensor<string, []>("op_1655_cast_fp16")];
+            tensor<string, []> var_1657_equation_0 = const()[name = tensor<string, []>("op_1657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1657_cast_fp16 = einsum(equation = var_1657_equation_0, values = (var_1487_cast_fp16, var_1609_cast_fp16))[name = tensor<string, []>("op_1657_cast_fp16")];
+            tensor<bool, []> var_1659_interleave_0 = const()[name = tensor<string, []>("op_1659_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1659_cast_fp16 = concat(axis = var_1184, interleave = var_1659_interleave_0, values = (var_1611_cast_fp16, var_1613_cast_fp16, var_1615_cast_fp16, var_1617_cast_fp16))[name = tensor<string, []>("op_1659_cast_fp16")];
+            tensor<bool, []> var_1661_interleave_0 = const()[name = tensor<string, []>("op_1661_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1661_cast_fp16 = concat(axis = var_1184, interleave = var_1661_interleave_0, values = (var_1619_cast_fp16, var_1621_cast_fp16, var_1623_cast_fp16, var_1625_cast_fp16))[name = tensor<string, []>("op_1661_cast_fp16")];
+            tensor<bool, []> var_1663_interleave_0 = const()[name = tensor<string, []>("op_1663_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1663_cast_fp16 = concat(axis = var_1184, interleave = var_1663_interleave_0, values = (var_1627_cast_fp16, var_1629_cast_fp16, var_1631_cast_fp16, var_1633_cast_fp16))[name = tensor<string, []>("op_1663_cast_fp16")];
+            tensor<bool, []> var_1665_interleave_0 = const()[name = tensor<string, []>("op_1665_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1665_cast_fp16 = concat(axis = var_1184, interleave = var_1665_interleave_0, values = (var_1635_cast_fp16, var_1637_cast_fp16, var_1639_cast_fp16, var_1641_cast_fp16))[name = tensor<string, []>("op_1665_cast_fp16")];
+            tensor<bool, []> var_1667_interleave_0 = const()[name = tensor<string, []>("op_1667_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1667_cast_fp16 = concat(axis = var_1184, interleave = var_1667_interleave_0, values = (var_1643_cast_fp16, var_1645_cast_fp16, var_1647_cast_fp16, var_1649_cast_fp16))[name = tensor<string, []>("op_1667_cast_fp16")];
+            tensor<bool, []> var_1669_interleave_0 = const()[name = tensor<string, []>("op_1669_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1669_cast_fp16 = concat(axis = var_1184, interleave = var_1669_interleave_0, values = (var_1651_cast_fp16, var_1653_cast_fp16, var_1655_cast_fp16, var_1657_cast_fp16))[name = tensor<string, []>("op_1669_cast_fp16")];
+            tensor<bool, []> input_17_interleave_0 = const()[name = tensor<string, []>("input_17_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_17_cast_fp16 = concat(axis = var_1195, interleave = input_17_interleave_0, values = (var_1659_cast_fp16, var_1661_cast_fp16, var_1663_cast_fp16, var_1665_cast_fp16, var_1667_cast_fp16, var_1669_cast_fp16))[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_11_strides_0 = const()[name = tensor<string, []>("obj_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_11_dilations_0 = const()[name = tensor<string, []>("obj_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_11_groups_0 = const()[name = tensor<string, []>("obj_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10211136)))];
+            tensor<fp16, [384]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506112)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = tensor<string, []>("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1688_to_fp16 = const()[name = tensor<string, []>("op_1688_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_1688_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [384]> input_19_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_19_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506944)))];
+            tensor<fp16, [384]> input_19_beta_0_to_fp16 = const()[name = tensor<string, []>("input_19_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10507776)))];
+            tensor<fp16, []> input_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_21_strides_0 = const()[name = tensor<string, []>("input_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_21_dilations_0 = const()[name = tensor<string, []>("input_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_21_groups_0 = const()[name = tensor<string, []>("input_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10508608)))];
+            tensor<fp16, [1536]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11688320)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_9_strides_0 = const()[name = tensor<string, []>("hidden_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_9_dilations_0 = const()[name = tensor<string, []>("hidden_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_9_groups_0 = const()[name = tensor<string, []>("hidden_states_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11691456)))];
+            tensor<fp16, [384]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12871168)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_1717 = const()[name = tensor<string, []>("op_1717"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1728 = const()[name = tensor<string, []>("op_1728"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = tensor<string, []>("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_1745_to_fp16 = const()[name = tensor<string, []>("op_1745_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_1745_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [384]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12872000)))];
+            tensor<fp16, [384]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12872832)))];
+            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_strides_0 = const()[name = tensor<string, []>("query_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_dilations_0 = const()[name = tensor<string, []>("query_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_groups_0 = const()[name = tensor<string, []>("query_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12873664)))];
+            tensor<fp16, [384]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13168640)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_strides_0 = const()[name = tensor<string, []>("key_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_dilations_0 = const()[name = tensor<string, []>("key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_groups_0 = const()[name = tensor<string, []>("key_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13169472)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_strides_0 = const()[name = tensor<string, []>("value_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_dilations_0 = const()[name = tensor<string, []>("value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_groups_0 = const()[name = tensor<string, []>("value_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13464448)))];
+            tensor<fp16, [384]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13759424)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_1783_begin_0 = const()[name = tensor<string, []>("op_1783_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1783_end_0 = const()[name = tensor<string, []>("op_1783_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1783_end_mask_0 = const()[name = tensor<string, []>("op_1783_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1783_cast_fp16 = slice_by_index(begin = var_1783_begin_0, end = var_1783_end_0, end_mask = var_1783_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1783_cast_fp16")];
+            tensor<int32, [4]> var_1787_begin_0 = const()[name = tensor<string, []>("op_1787_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1787_end_0 = const()[name = tensor<string, []>("op_1787_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1787_end_mask_0 = const()[name = tensor<string, []>("op_1787_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1787_cast_fp16")];
+            tensor<int32, [4]> var_1791_begin_0 = const()[name = tensor<string, []>("op_1791_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1791_end_0 = const()[name = tensor<string, []>("op_1791_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1791_end_mask_0 = const()[name = tensor<string, []>("op_1791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1791_cast_fp16 = slice_by_index(begin = var_1791_begin_0, end = var_1791_end_0, end_mask = var_1791_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1791_cast_fp16")];
+            tensor<int32, [4]> var_1795_begin_0 = const()[name = tensor<string, []>("op_1795_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1795_end_0 = const()[name = tensor<string, []>("op_1795_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1795_end_mask_0 = const()[name = tensor<string, []>("op_1795_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1795_cast_fp16 = slice_by_index(begin = var_1795_begin_0, end = var_1795_end_0, end_mask = var_1795_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1795_cast_fp16")];
+            tensor<int32, [4]> var_1799_begin_0 = const()[name = tensor<string, []>("op_1799_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1799_end_0 = const()[name = tensor<string, []>("op_1799_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1799_end_mask_0 = const()[name = tensor<string, []>("op_1799_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1799_cast_fp16 = slice_by_index(begin = var_1799_begin_0, end = var_1799_end_0, end_mask = var_1799_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1799_cast_fp16")];
+            tensor<int32, [4]> var_1803_begin_0 = const()[name = tensor<string, []>("op_1803_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1803_end_0 = const()[name = tensor<string, []>("op_1803_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1803_end_mask_0 = const()[name = tensor<string, []>("op_1803_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16 = slice_by_index(begin = var_1803_begin_0, end = var_1803_end_0, end_mask = var_1803_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1803_cast_fp16")];
+            tensor<int32, [4]> var_1812_begin_0 = const()[name = tensor<string, []>("op_1812_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1812_end_0 = const()[name = tensor<string, []>("op_1812_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1812_end_mask_0 = const()[name = tensor<string, []>("op_1812_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1812_cast_fp16 = slice_by_index(begin = var_1812_begin_0, end = var_1812_end_0, end_mask = var_1812_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1812_cast_fp16")];
+            tensor<int32, [4]> var_1819_begin_0 = const()[name = tensor<string, []>("op_1819_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1819_end_0 = const()[name = tensor<string, []>("op_1819_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1819_end_mask_0 = const()[name = tensor<string, []>("op_1819_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1819_cast_fp16 = slice_by_index(begin = var_1819_begin_0, end = var_1819_end_0, end_mask = var_1819_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1819_cast_fp16")];
+            tensor<int32, [4]> var_1826_begin_0 = const()[name = tensor<string, []>("op_1826_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1826_end_0 = const()[name = tensor<string, []>("op_1826_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1826_end_mask_0 = const()[name = tensor<string, []>("op_1826_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1826_cast_fp16 = slice_by_index(begin = var_1826_begin_0, end = var_1826_end_0, end_mask = var_1826_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1826_cast_fp16")];
+            tensor<int32, [4]> var_1833_begin_0 = const()[name = tensor<string, []>("op_1833_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1833_end_0 = const()[name = tensor<string, []>("op_1833_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1833_end_mask_0 = const()[name = tensor<string, []>("op_1833_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1833_cast_fp16 = slice_by_index(begin = var_1833_begin_0, end = var_1833_end_0, end_mask = var_1833_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1833_cast_fp16")];
+            tensor<int32, [4]> var_1840_begin_0 = const()[name = tensor<string, []>("op_1840_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1840_end_0 = const()[name = tensor<string, []>("op_1840_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1840_end_mask_0 = const()[name = tensor<string, []>("op_1840_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1840_cast_fp16 = slice_by_index(begin = var_1840_begin_0, end = var_1840_end_0, end_mask = var_1840_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<int32, [4]> var_1847_begin_0 = const()[name = tensor<string, []>("op_1847_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1847_end_0 = const()[name = tensor<string, []>("op_1847_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1847_end_mask_0 = const()[name = tensor<string, []>("op_1847_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1847_cast_fp16 = slice_by_index(begin = var_1847_begin_0, end = var_1847_end_0, end_mask = var_1847_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
+            tensor<int32, [4]> var_1854_begin_0 = const()[name = tensor<string, []>("op_1854_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1854_end_0 = const()[name = tensor<string, []>("op_1854_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1854_end_mask_0 = const()[name = tensor<string, []>("op_1854_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1854_cast_fp16 = slice_by_index(begin = var_1854_begin_0, end = var_1854_end_0, end_mask = var_1854_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1854_cast_fp16")];
+            tensor<int32, [4]> var_1861_begin_0 = const()[name = tensor<string, []>("op_1861_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1861_end_0 = const()[name = tensor<string, []>("op_1861_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1861_end_mask_0 = const()[name = tensor<string, []>("op_1861_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1861_cast_fp16 = slice_by_index(begin = var_1861_begin_0, end = var_1861_end_0, end_mask = var_1861_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<int32, [4]> var_1868_begin_0 = const()[name = tensor<string, []>("op_1868_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1868_end_0 = const()[name = tensor<string, []>("op_1868_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1868_end_mask_0 = const()[name = tensor<string, []>("op_1868_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1868_cast_fp16 = slice_by_index(begin = var_1868_begin_0, end = var_1868_end_0, end_mask = var_1868_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1868_cast_fp16")];
+            tensor<int32, [4]> var_1875_begin_0 = const()[name = tensor<string, []>("op_1875_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1875_end_0 = const()[name = tensor<string, []>("op_1875_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1875_end_mask_0 = const()[name = tensor<string, []>("op_1875_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1875_cast_fp16 = slice_by_index(begin = var_1875_begin_0, end = var_1875_end_0, end_mask = var_1875_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1875_cast_fp16")];
+            tensor<int32, [4]> var_1882_begin_0 = const()[name = tensor<string, []>("op_1882_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1882_end_0 = const()[name = tensor<string, []>("op_1882_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1882_end_mask_0 = const()[name = tensor<string, []>("op_1882_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1882_cast_fp16 = slice_by_index(begin = var_1882_begin_0, end = var_1882_end_0, end_mask = var_1882_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1882_cast_fp16")];
+            tensor<int32, [4]> var_1889_begin_0 = const()[name = tensor<string, []>("op_1889_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1889_end_0 = const()[name = tensor<string, []>("op_1889_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1889_end_mask_0 = const()[name = tensor<string, []>("op_1889_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1889_cast_fp16 = slice_by_index(begin = var_1889_begin_0, end = var_1889_end_0, end_mask = var_1889_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1889_cast_fp16")];
+            tensor<int32, [4]> var_1896_begin_0 = const()[name = tensor<string, []>("op_1896_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1896_end_0 = const()[name = tensor<string, []>("op_1896_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1896_end_mask_0 = const()[name = tensor<string, []>("op_1896_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1896_cast_fp16 = slice_by_index(begin = var_1896_begin_0, end = var_1896_end_0, end_mask = var_1896_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<int32, [4]> var_1903_begin_0 = const()[name = tensor<string, []>("op_1903_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1903_end_0 = const()[name = tensor<string, []>("op_1903_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1903_end_mask_0 = const()[name = tensor<string, []>("op_1903_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1903_cast_fp16 = slice_by_index(begin = var_1903_begin_0, end = var_1903_end_0, end_mask = var_1903_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<int32, [4]> var_1910_begin_0 = const()[name = tensor<string, []>("op_1910_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1910_end_0 = const()[name = tensor<string, []>("op_1910_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1910_end_mask_0 = const()[name = tensor<string, []>("op_1910_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1910_cast_fp16 = slice_by_index(begin = var_1910_begin_0, end = var_1910_end_0, end_mask = var_1910_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<int32, [4]> var_1917_begin_0 = const()[name = tensor<string, []>("op_1917_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1917_end_0 = const()[name = tensor<string, []>("op_1917_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1917_end_mask_0 = const()[name = tensor<string, []>("op_1917_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1917_cast_fp16 = slice_by_index(begin = var_1917_begin_0, end = var_1917_end_0, end_mask = var_1917_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<int32, [4]> var_1924_begin_0 = const()[name = tensor<string, []>("op_1924_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1924_end_0 = const()[name = tensor<string, []>("op_1924_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1924_end_mask_0 = const()[name = tensor<string, []>("op_1924_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = var_1924_end_0, end_mask = var_1924_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<int32, [4]> var_1931_begin_0 = const()[name = tensor<string, []>("op_1931_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1931_end_0 = const()[name = tensor<string, []>("op_1931_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1931_end_mask_0 = const()[name = tensor<string, []>("op_1931_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1931_cast_fp16 = slice_by_index(begin = var_1931_begin_0, end = var_1931_end_0, end_mask = var_1931_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1931_cast_fp16")];
+            tensor<int32, [4]> var_1938_begin_0 = const()[name = tensor<string, []>("op_1938_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1938_end_0 = const()[name = tensor<string, []>("op_1938_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1938_end_mask_0 = const()[name = tensor<string, []>("op_1938_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1938_cast_fp16 = slice_by_index(begin = var_1938_begin_0, end = var_1938_end_0, end_mask = var_1938_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1938_cast_fp16")];
+            tensor<int32, [4]> var_1945_begin_0 = const()[name = tensor<string, []>("op_1945_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1945_end_0 = const()[name = tensor<string, []>("op_1945_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1945_end_mask_0 = const()[name = tensor<string, []>("op_1945_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1945_cast_fp16 = slice_by_index(begin = var_1945_begin_0, end = var_1945_end_0, end_mask = var_1945_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1945_cast_fp16")];
+            tensor<int32, [4]> var_1952_begin_0 = const()[name = tensor<string, []>("op_1952_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1952_end_0 = const()[name = tensor<string, []>("op_1952_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1952_end_mask_0 = const()[name = tensor<string, []>("op_1952_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1952_cast_fp16 = slice_by_index(begin = var_1952_begin_0, end = var_1952_end_0, end_mask = var_1952_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1952_cast_fp16")];
+            tensor<int32, [4]> var_1959_begin_0 = const()[name = tensor<string, []>("op_1959_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1959_end_0 = const()[name = tensor<string, []>("op_1959_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1959_end_mask_0 = const()[name = tensor<string, []>("op_1959_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1959_cast_fp16 = slice_by_index(begin = var_1959_begin_0, end = var_1959_end_0, end_mask = var_1959_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1959_cast_fp16")];
+            tensor<int32, [4]> var_1966_begin_0 = const()[name = tensor<string, []>("op_1966_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1966_end_0 = const()[name = tensor<string, []>("op_1966_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1966_end_mask_0 = const()[name = tensor<string, []>("op_1966_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1966_cast_fp16 = slice_by_index(begin = var_1966_begin_0, end = var_1966_end_0, end_mask = var_1966_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1966_cast_fp16")];
+            tensor<int32, [4]> var_1973_begin_0 = const()[name = tensor<string, []>("op_1973_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1973_end_0 = const()[name = tensor<string, []>("op_1973_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1973_end_mask_0 = const()[name = tensor<string, []>("op_1973_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1973_cast_fp16 = slice_by_index(begin = var_1973_begin_0, end = var_1973_end_0, end_mask = var_1973_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1973_cast_fp16")];
+            tensor<int32, [4]> k_perm_0 = const()[name = tensor<string, []>("k_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1978_begin_0 = const()[name = tensor<string, []>("op_1978_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1978_end_0 = const()[name = tensor<string, []>("op_1978_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1978_end_mask_0 = const()[name = tensor<string, []>("op_1978_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> k_cast_fp16 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1978_cast_fp16 = slice_by_index(begin = var_1978_begin_0, end = var_1978_end_0, end_mask = var_1978_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<int32, [4]> var_1982_begin_0 = const()[name = tensor<string, []>("op_1982_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1982_end_0 = const()[name = tensor<string, []>("op_1982_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1982_end_mask_0 = const()[name = tensor<string, []>("op_1982_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1982_cast_fp16 = slice_by_index(begin = var_1982_begin_0, end = var_1982_end_0, end_mask = var_1982_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1982_cast_fp16")];
+            tensor<int32, [4]> var_1986_begin_0 = const()[name = tensor<string, []>("op_1986_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1986_end_0 = const()[name = tensor<string, []>("op_1986_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1986_end_mask_0 = const()[name = tensor<string, []>("op_1986_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1986_cast_fp16 = slice_by_index(begin = var_1986_begin_0, end = var_1986_end_0, end_mask = var_1986_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1986_cast_fp16")];
+            tensor<int32, [4]> var_1990_begin_0 = const()[name = tensor<string, []>("op_1990_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1990_end_0 = const()[name = tensor<string, []>("op_1990_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1990_end_mask_0 = const()[name = tensor<string, []>("op_1990_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1990_cast_fp16 = slice_by_index(begin = var_1990_begin_0, end = var_1990_end_0, end_mask = var_1990_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1990_cast_fp16")];
+            tensor<int32, [4]> var_1994_begin_0 = const()[name = tensor<string, []>("op_1994_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1994_end_0 = const()[name = tensor<string, []>("op_1994_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1994_end_mask_0 = const()[name = tensor<string, []>("op_1994_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1994_cast_fp16 = slice_by_index(begin = var_1994_begin_0, end = var_1994_end_0, end_mask = var_1994_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1994_cast_fp16")];
+            tensor<int32, [4]> var_1998_begin_0 = const()[name = tensor<string, []>("op_1998_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1998_end_0 = const()[name = tensor<string, []>("op_1998_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1998_end_mask_0 = const()[name = tensor<string, []>("op_1998_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1998_cast_fp16 = slice_by_index(begin = var_1998_begin_0, end = var_1998_end_0, end_mask = var_1998_end_mask_0, x = k_cast_fp16)[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<int32, [4]> var_2000_begin_0 = const()[name = tensor<string, []>("op_2000_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2000_end_0 = const()[name = tensor<string, []>("op_2000_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2000_end_mask_0 = const()[name = tensor<string, []>("op_2000_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2000_cast_fp16 = slice_by_index(begin = var_2000_begin_0, end = var_2000_end_0, end_mask = var_2000_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2000_cast_fp16")];
+            tensor<int32, [4]> var_2004_begin_0 = const()[name = tensor<string, []>("op_2004_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2004_end_0 = const()[name = tensor<string, []>("op_2004_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2004_end_mask_0 = const()[name = tensor<string, []>("op_2004_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2004_cast_fp16")];
+            tensor<int32, [4]> var_2008_begin_0 = const()[name = tensor<string, []>("op_2008_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2008_end_0 = const()[name = tensor<string, []>("op_2008_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2008_end_mask_0 = const()[name = tensor<string, []>("op_2008_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16 = slice_by_index(begin = var_2008_begin_0, end = var_2008_end_0, end_mask = var_2008_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
+            tensor<int32, [4]> var_2012_begin_0 = const()[name = tensor<string, []>("op_2012_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2012_end_0 = const()[name = tensor<string, []>("op_2012_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2012_end_mask_0 = const()[name = tensor<string, []>("op_2012_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2012_cast_fp16 = slice_by_index(begin = var_2012_begin_0, end = var_2012_end_0, end_mask = var_2012_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2012_cast_fp16")];
+            tensor<int32, [4]> var_2016_begin_0 = const()[name = tensor<string, []>("op_2016_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2016_end_0 = const()[name = tensor<string, []>("op_2016_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2016_end_mask_0 = const()[name = tensor<string, []>("op_2016_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2016_cast_fp16 = slice_by_index(begin = var_2016_begin_0, end = var_2016_end_0, end_mask = var_2016_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2016_cast_fp16")];
+            tensor<int32, [4]> var_2020_begin_0 = const()[name = tensor<string, []>("op_2020_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2020_end_0 = const()[name = tensor<string, []>("op_2020_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2020_end_mask_0 = const()[name = tensor<string, []>("op_2020_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2020_cast_fp16 = slice_by_index(begin = var_2020_begin_0, end = var_2020_end_0, end_mask = var_2020_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2020_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_145_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_145_equation_0, values = (var_1978_cast_fp16, var_1812_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_145_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_147_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_147_equation_0, values = (var_1978_cast_fp16, var_1819_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_147_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_149_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_149_equation_0, values = (var_1978_cast_fp16, var_1826_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_149_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_151_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_151_equation_0, values = (var_1978_cast_fp16, var_1833_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_151_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_153_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_153_equation_0, values = (var_1982_cast_fp16, var_1840_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_153_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_155_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_155_equation_0, values = (var_1982_cast_fp16, var_1847_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_155_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_157_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_157_equation_0, values = (var_1982_cast_fp16, var_1854_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_157_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_159_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_159_equation_0, values = (var_1982_cast_fp16, var_1861_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_159_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_161_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_161_equation_0, values = (var_1986_cast_fp16, var_1868_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_161_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_163_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_163_equation_0, values = (var_1986_cast_fp16, var_1875_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_163_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_165_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_165_equation_0, values = (var_1986_cast_fp16, var_1882_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_165_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_167_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_167_equation_0, values = (var_1986_cast_fp16, var_1889_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_167_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_169_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_169_equation_0, values = (var_1990_cast_fp16, var_1896_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_169_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_171_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_171_equation_0, values = (var_1990_cast_fp16, var_1903_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_171_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_173_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_173_equation_0, values = (var_1990_cast_fp16, var_1910_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_173_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_175_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_175_equation_0, values = (var_1990_cast_fp16, var_1917_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_175_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_177_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_177_equation_0, values = (var_1994_cast_fp16, var_1924_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_177_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_179_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_179_equation_0, values = (var_1994_cast_fp16, var_1931_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_179_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_181_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_181_equation_0, values = (var_1994_cast_fp16, var_1938_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_181_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_183_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_183_equation_0, values = (var_1994_cast_fp16, var_1945_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_183_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_185_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_185_equation_0, values = (var_1998_cast_fp16, var_1952_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_185_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_187_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_187_equation_0, values = (var_1998_cast_fp16, var_1959_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_187_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_189_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_189_equation_0, values = (var_1998_cast_fp16, var_1966_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_189_cast_fp16")];
+            tensor<string, []> _SplitHeadsQ__mh_w_equation_0 = const()[name = tensor<string, []>("_SplitHeadsQ__mh_w_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> _SplitHeadsQ__mh_w_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_equation_0, values = (var_1998_cast_fp16, var_1973_cast_fp16))[name = tensor<string, []>("_SplitHeadsQ__mh_w_cast_fp16")];
+            tensor<fp16, []> var_2071_to_fp16 = const()[name = tensor<string, []>("op_2071_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_145_cast_fp16, y = var_2071_to_fp16)[name = tensor<string, []>("aw_chunk_145_cast_fp16")];
+            tensor<fp16, []> var_2073_to_fp16 = const()[name = tensor<string, []>("op_2073_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_147_cast_fp16, y = var_2073_to_fp16)[name = tensor<string, []>("aw_chunk_147_cast_fp16")];
+            tensor<fp16, []> var_2075_to_fp16 = const()[name = tensor<string, []>("op_2075_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_149_cast_fp16, y = var_2075_to_fp16)[name = tensor<string, []>("aw_chunk_149_cast_fp16")];
+            tensor<fp16, []> var_2077_to_fp16 = const()[name = tensor<string, []>("op_2077_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_151_cast_fp16, y = var_2077_to_fp16)[name = tensor<string, []>("aw_chunk_151_cast_fp16")];
+            tensor<fp16, []> var_2079_to_fp16 = const()[name = tensor<string, []>("op_2079_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_153_cast_fp16, y = var_2079_to_fp16)[name = tensor<string, []>("aw_chunk_153_cast_fp16")];
+            tensor<fp16, []> var_2081_to_fp16 = const()[name = tensor<string, []>("op_2081_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_155_cast_fp16, y = var_2081_to_fp16)[name = tensor<string, []>("aw_chunk_155_cast_fp16")];
+            tensor<fp16, []> var_2083_to_fp16 = const()[name = tensor<string, []>("op_2083_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_157_cast_fp16, y = var_2083_to_fp16)[name = tensor<string, []>("aw_chunk_157_cast_fp16")];
+            tensor<fp16, []> var_2085_to_fp16 = const()[name = tensor<string, []>("op_2085_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_159_cast_fp16, y = var_2085_to_fp16)[name = tensor<string, []>("aw_chunk_159_cast_fp16")];
+            tensor<fp16, []> var_2087_to_fp16 = const()[name = tensor<string, []>("op_2087_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_161_cast_fp16, y = var_2087_to_fp16)[name = tensor<string, []>("aw_chunk_161_cast_fp16")];
+            tensor<fp16, []> var_2089_to_fp16 = const()[name = tensor<string, []>("op_2089_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_163_cast_fp16, y = var_2089_to_fp16)[name = tensor<string, []>("aw_chunk_163_cast_fp16")];
+            tensor<fp16, []> var_2091_to_fp16 = const()[name = tensor<string, []>("op_2091_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_165_cast_fp16, y = var_2091_to_fp16)[name = tensor<string, []>("aw_chunk_165_cast_fp16")];
+            tensor<fp16, []> var_2093_to_fp16 = const()[name = tensor<string, []>("op_2093_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_167_cast_fp16, y = var_2093_to_fp16)[name = tensor<string, []>("aw_chunk_167_cast_fp16")];
+            tensor<fp16, []> var_2095_to_fp16 = const()[name = tensor<string, []>("op_2095_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_169_cast_fp16, y = var_2095_to_fp16)[name = tensor<string, []>("aw_chunk_169_cast_fp16")];
+            tensor<fp16, []> var_2097_to_fp16 = const()[name = tensor<string, []>("op_2097_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_171_cast_fp16, y = var_2097_to_fp16)[name = tensor<string, []>("aw_chunk_171_cast_fp16")];
+            tensor<fp16, []> var_2099_to_fp16 = const()[name = tensor<string, []>("op_2099_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_173_cast_fp16, y = var_2099_to_fp16)[name = tensor<string, []>("aw_chunk_173_cast_fp16")];
+            tensor<fp16, []> var_2101_to_fp16 = const()[name = tensor<string, []>("op_2101_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_175_cast_fp16, y = var_2101_to_fp16)[name = tensor<string, []>("aw_chunk_175_cast_fp16")];
+            tensor<fp16, []> var_2103_to_fp16 = const()[name = tensor<string, []>("op_2103_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_177_cast_fp16, y = var_2103_to_fp16)[name = tensor<string, []>("aw_chunk_177_cast_fp16")];
+            tensor<fp16, []> var_2105_to_fp16 = const()[name = tensor<string, []>("op_2105_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_179_cast_fp16, y = var_2105_to_fp16)[name = tensor<string, []>("aw_chunk_179_cast_fp16")];
+            tensor<fp16, []> var_2107_to_fp16 = const()[name = tensor<string, []>("op_2107_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_181_cast_fp16, y = var_2107_to_fp16)[name = tensor<string, []>("aw_chunk_181_cast_fp16")];
+            tensor<fp16, []> var_2109_to_fp16 = const()[name = tensor<string, []>("op_2109_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_183_cast_fp16, y = var_2109_to_fp16)[name = tensor<string, []>("aw_chunk_183_cast_fp16")];
+            tensor<fp16, []> var_2111_to_fp16 = const()[name = tensor<string, []>("op_2111_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_185_cast_fp16, y = var_2111_to_fp16)[name = tensor<string, []>("aw_chunk_185_cast_fp16")];
+            tensor<fp16, []> var_2113_to_fp16 = const()[name = tensor<string, []>("op_2113_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_187_cast_fp16, y = var_2113_to_fp16)[name = tensor<string, []>("aw_chunk_187_cast_fp16")];
+            tensor<fp16, []> var_2115_to_fp16 = const()[name = tensor<string, []>("op_2115_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_189_cast_fp16, y = var_2115_to_fp16)[name = tensor<string, []>("aw_chunk_189_cast_fp16")];
+            tensor<fp16, []> var_2117_to_fp16 = const()[name = tensor<string, []>("op_2117_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_cast_fp16, y = var_2117_to_fp16)[name = tensor<string, []>("aw_chunk_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2119_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_145_cast_fp16)[name = tensor<string, []>("op_2119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2120_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_147_cast_fp16)[name = tensor<string, []>("op_2120_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2121_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_149_cast_fp16)[name = tensor<string, []>("op_2121_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2122_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_151_cast_fp16)[name = tensor<string, []>("op_2122_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2123_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_153_cast_fp16)[name = tensor<string, []>("op_2123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2124_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_155_cast_fp16)[name = tensor<string, []>("op_2124_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2125_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_157_cast_fp16)[name = tensor<string, []>("op_2125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2126_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_159_cast_fp16)[name = tensor<string, []>("op_2126_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2127_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_161_cast_fp16)[name = tensor<string, []>("op_2127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2128_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_163_cast_fp16)[name = tensor<string, []>("op_2128_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2129_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_165_cast_fp16)[name = tensor<string, []>("op_2129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2130_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_167_cast_fp16)[name = tensor<string, []>("op_2130_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2131_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_169_cast_fp16)[name = tensor<string, []>("op_2131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2132_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_171_cast_fp16)[name = tensor<string, []>("op_2132_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2133_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_173_cast_fp16)[name = tensor<string, []>("op_2133_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2134_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_175_cast_fp16)[name = tensor<string, []>("op_2134_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2135_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_177_cast_fp16)[name = tensor<string, []>("op_2135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2136_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_179_cast_fp16)[name = tensor<string, []>("op_2136_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2137_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_181_cast_fp16)[name = tensor<string, []>("op_2137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2138_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_183_cast_fp16)[name = tensor<string, []>("op_2138_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2139_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_185_cast_fp16)[name = tensor<string, []>("op_2139_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2140_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_187_cast_fp16)[name = tensor<string, []>("op_2140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2141_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_189_cast_fp16)[name = tensor<string, []>("op_2141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2142_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_cast_fp16)[name = tensor<string, []>("op_2142_cast_fp16")];
+            tensor<string, []> var_2144_equation_0 = const()[name = tensor<string, []>("op_2144_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2144_cast_fp16 = einsum(equation = var_2144_equation_0, values = (var_2000_cast_fp16, var_2119_cast_fp16))[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<string, []> var_2146_equation_0 = const()[name = tensor<string, []>("op_2146_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2146_cast_fp16 = einsum(equation = var_2146_equation_0, values = (var_2000_cast_fp16, var_2120_cast_fp16))[name = tensor<string, []>("op_2146_cast_fp16")];
+            tensor<string, []> var_2148_equation_0 = const()[name = tensor<string, []>("op_2148_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2148_cast_fp16 = einsum(equation = var_2148_equation_0, values = (var_2000_cast_fp16, var_2121_cast_fp16))[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<string, []> var_2150_equation_0 = const()[name = tensor<string, []>("op_2150_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2150_cast_fp16 = einsum(equation = var_2150_equation_0, values = (var_2000_cast_fp16, var_2122_cast_fp16))[name = tensor<string, []>("op_2150_cast_fp16")];
+            tensor<string, []> var_2152_equation_0 = const()[name = tensor<string, []>("op_2152_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2152_cast_fp16 = einsum(equation = var_2152_equation_0, values = (var_2004_cast_fp16, var_2123_cast_fp16))[name = tensor<string, []>("op_2152_cast_fp16")];
+            tensor<string, []> var_2154_equation_0 = const()[name = tensor<string, []>("op_2154_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2154_cast_fp16 = einsum(equation = var_2154_equation_0, values = (var_2004_cast_fp16, var_2124_cast_fp16))[name = tensor<string, []>("op_2154_cast_fp16")];
+            tensor<string, []> var_2156_equation_0 = const()[name = tensor<string, []>("op_2156_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2156_cast_fp16 = einsum(equation = var_2156_equation_0, values = (var_2004_cast_fp16, var_2125_cast_fp16))[name = tensor<string, []>("op_2156_cast_fp16")];
+            tensor<string, []> var_2158_equation_0 = const()[name = tensor<string, []>("op_2158_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2158_cast_fp16 = einsum(equation = var_2158_equation_0, values = (var_2004_cast_fp16, var_2126_cast_fp16))[name = tensor<string, []>("op_2158_cast_fp16")];
+            tensor<string, []> var_2160_equation_0 = const()[name = tensor<string, []>("op_2160_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2160_cast_fp16 = einsum(equation = var_2160_equation_0, values = (var_2008_cast_fp16, var_2127_cast_fp16))[name = tensor<string, []>("op_2160_cast_fp16")];
+            tensor<string, []> var_2162_equation_0 = const()[name = tensor<string, []>("op_2162_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2162_cast_fp16 = einsum(equation = var_2162_equation_0, values = (var_2008_cast_fp16, var_2128_cast_fp16))[name = tensor<string, []>("op_2162_cast_fp16")];
+            tensor<string, []> var_2164_equation_0 = const()[name = tensor<string, []>("op_2164_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2164_cast_fp16 = einsum(equation = var_2164_equation_0, values = (var_2008_cast_fp16, var_2129_cast_fp16))[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<string, []> var_2166_equation_0 = const()[name = tensor<string, []>("op_2166_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2166_cast_fp16 = einsum(equation = var_2166_equation_0, values = (var_2008_cast_fp16, var_2130_cast_fp16))[name = tensor<string, []>("op_2166_cast_fp16")];
+            tensor<string, []> var_2168_equation_0 = const()[name = tensor<string, []>("op_2168_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2168_cast_fp16 = einsum(equation = var_2168_equation_0, values = (var_2012_cast_fp16, var_2131_cast_fp16))[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<string, []> var_2170_equation_0 = const()[name = tensor<string, []>("op_2170_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2170_cast_fp16 = einsum(equation = var_2170_equation_0, values = (var_2012_cast_fp16, var_2132_cast_fp16))[name = tensor<string, []>("op_2170_cast_fp16")];
+            tensor<string, []> var_2172_equation_0 = const()[name = tensor<string, []>("op_2172_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2172_cast_fp16 = einsum(equation = var_2172_equation_0, values = (var_2012_cast_fp16, var_2133_cast_fp16))[name = tensor<string, []>("op_2172_cast_fp16")];
+            tensor<string, []> var_2174_equation_0 = const()[name = tensor<string, []>("op_2174_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2174_cast_fp16 = einsum(equation = var_2174_equation_0, values = (var_2012_cast_fp16, var_2134_cast_fp16))[name = tensor<string, []>("op_2174_cast_fp16")];
+            tensor<string, []> var_2176_equation_0 = const()[name = tensor<string, []>("op_2176_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2176_cast_fp16 = einsum(equation = var_2176_equation_0, values = (var_2016_cast_fp16, var_2135_cast_fp16))[name = tensor<string, []>("op_2176_cast_fp16")];
+            tensor<string, []> var_2178_equation_0 = const()[name = tensor<string, []>("op_2178_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2178_cast_fp16 = einsum(equation = var_2178_equation_0, values = (var_2016_cast_fp16, var_2136_cast_fp16))[name = tensor<string, []>("op_2178_cast_fp16")];
+            tensor<string, []> var_2180_equation_0 = const()[name = tensor<string, []>("op_2180_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2180_cast_fp16 = einsum(equation = var_2180_equation_0, values = (var_2016_cast_fp16, var_2137_cast_fp16))[name = tensor<string, []>("op_2180_cast_fp16")];
+            tensor<string, []> var_2182_equation_0 = const()[name = tensor<string, []>("op_2182_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2182_cast_fp16 = einsum(equation = var_2182_equation_0, values = (var_2016_cast_fp16, var_2138_cast_fp16))[name = tensor<string, []>("op_2182_cast_fp16")];
+            tensor<string, []> var_2184_equation_0 = const()[name = tensor<string, []>("op_2184_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2184_cast_fp16 = einsum(equation = var_2184_equation_0, values = (var_2020_cast_fp16, var_2139_cast_fp16))[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<string, []> var_2186_equation_0 = const()[name = tensor<string, []>("op_2186_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2186_cast_fp16 = einsum(equation = var_2186_equation_0, values = (var_2020_cast_fp16, var_2140_cast_fp16))[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<string, []> var_2188_equation_0 = const()[name = tensor<string, []>("op_2188_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2188_cast_fp16 = einsum(equation = var_2188_equation_0, values = (var_2020_cast_fp16, var_2141_cast_fp16))[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<string, []> var_2190_equation_0 = const()[name = tensor<string, []>("op_2190_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2190_cast_fp16 = einsum(equation = var_2190_equation_0, values = (var_2020_cast_fp16, var_2142_cast_fp16))[name = tensor<string, []>("op_2190_cast_fp16")];
+            tensor<bool, []> var_2192_interleave_0 = const()[name = tensor<string, []>("op_2192_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2192_cast_fp16 = concat(axis = var_1717, interleave = var_2192_interleave_0, values = (var_2144_cast_fp16, var_2146_cast_fp16, var_2148_cast_fp16, var_2150_cast_fp16))[name = tensor<string, []>("op_2192_cast_fp16")];
+            tensor<bool, []> var_2194_interleave_0 = const()[name = tensor<string, []>("op_2194_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2194_cast_fp16 = concat(axis = var_1717, interleave = var_2194_interleave_0, values = (var_2152_cast_fp16, var_2154_cast_fp16, var_2156_cast_fp16, var_2158_cast_fp16))[name = tensor<string, []>("op_2194_cast_fp16")];
+            tensor<bool, []> var_2196_interleave_0 = const()[name = tensor<string, []>("op_2196_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2196_cast_fp16 = concat(axis = var_1717, interleave = var_2196_interleave_0, values = (var_2160_cast_fp16, var_2162_cast_fp16, var_2164_cast_fp16, var_2166_cast_fp16))[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<bool, []> var_2198_interleave_0 = const()[name = tensor<string, []>("op_2198_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2198_cast_fp16 = concat(axis = var_1717, interleave = var_2198_interleave_0, values = (var_2168_cast_fp16, var_2170_cast_fp16, var_2172_cast_fp16, var_2174_cast_fp16))[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<bool, []> var_2200_interleave_0 = const()[name = tensor<string, []>("op_2200_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2200_cast_fp16 = concat(axis = var_1717, interleave = var_2200_interleave_0, values = (var_2176_cast_fp16, var_2178_cast_fp16, var_2180_cast_fp16, var_2182_cast_fp16))[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<bool, []> var_2202_interleave_0 = const()[name = tensor<string, []>("op_2202_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2202_cast_fp16 = concat(axis = var_1717, interleave = var_2202_interleave_0, values = (var_2184_cast_fp16, var_2186_cast_fp16, var_2188_cast_fp16, var_2190_cast_fp16))[name = tensor<string, []>("op_2202_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_25_cast_fp16 = concat(axis = var_1728, interleave = input_25_interleave_0, values = (var_2192_cast_fp16, var_2194_cast_fp16, var_2196_cast_fp16, var_2198_cast_fp16, var_2200_cast_fp16, var_2202_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> obj_pad_type_0 = const()[name = tensor<string, []>("obj_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_strides_0 = const()[name = tensor<string, []>("obj_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_pad_0 = const()[name = tensor<string, []>("obj_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_dilations_0 = const()[name = tensor<string, []>("obj_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_groups_0 = const()[name = tensor<string, []>("obj_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13760256)))];
+            tensor<fp16, [384]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14055232)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = tensor<string, []>("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2221_to_fp16 = const()[name = tensor<string, []>("op_2221_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_2221_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [384]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14056064)))];
+            tensor<fp16, [384]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14056896)))];
+            tensor<fp16, []> input_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14057728)))];
+            tensor<fp16, [1536]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15237440)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> hidden_states_pad_type_0 = const()[name = tensor<string, []>("hidden_states_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_strides_0 = const()[name = tensor<string, []>("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_pad_0 = const()[name = tensor<string, []>("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_dilations_0 = const()[name = tensor<string, []>("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_groups_0 = const()[name = tensor<string, []>("hidden_states_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15240576)))];
+            tensor<fp16, [384]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16420288)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = tensor<string, []>("out_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_2259_to_fp16 = const()[name = tensor<string, []>("op_2259_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2259_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [384]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16421120)))];
+            tensor<fp16, [384]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16421952)))];
+            tensor<fp16, []> encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("encoder_output_embeds_type_fp32_cast_fp16")];
+        } -> (encoder_output_embeds);
+}
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..1d7233cedc3da4c7344f4be5a83781a9603b04fe
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:030d64a3ddd296d6f709691a66a870aab7ee9f19e5fe07e8086245fb85302802
+size 54965
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4daffa592241d25c788329513821604bb92c4edb
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3706dac8d9d4bec269d3cee10fa4eda39b4240a46091c8323c1731a8c6d59c2
+size 16422784
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel b/openai_whisper-tiny.en/AudioEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..c1038337ecf65f6249a0b86c2e5e9549c8885b67
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d344dcf1192f28f38afc259079c9c0a2d26bf4c22e5066c35a7d05eed81f17c3
+size 257776
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin b/openai_whisper-tiny.en/AudioEncoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cda427752fb96c7a41597c2764a1cd21fe7e11a0
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3860a1f422710e98f060b7414c0c7034f4a1b6a819eec0530e2e57e30d891e72
+size 16422784
diff --git a/openai_whisper-tiny.en/AudioEncoder.mlpackage/Manifest.json b/openai_whisper-tiny.en/AudioEncoder.mlpackage/Manifest.json
new file mode 100644
index 0000000000000000000000000000000000000000..22de3edd731b51332ba2c4b62320f2d893ff7dc2
--- /dev/null
+++ b/openai_whisper-tiny.en/AudioEncoder.mlpackage/Manifest.json
@@ -0,0 +1,18 @@
+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "743BE91C-5205-432D-80DC-67CC4DB2D65A": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "96C95FCF-711E-4118-AD97-5B6E8A74B2BC": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "743BE91C-5205-432D-80DC-67CC4DB2D65A"
+}
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2e1726d7a7279ad316b3376cb887c6bd0655a1f9
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:160d9737169d22dc01a899e1c6a0a9c44d0637d41f0dedb2a0b7c1422c4035d2
+size 243
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1f160a0801459e1e12ed3c11bd674a5b019939cd
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb3b3f51b080f58b12a6888a5e8ad57419be9e4c6843b96a7577f171b300e660
+size 328
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..516efcd4cb4956ffa007fde604b3d600aecc028b
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Pad" : 1,
+      "Ios16.mul" : 2,
+      "SliceByIndex" : 1,
+      "Ios16.sub" : 1,
+      "Ios16.log" : 1,
+      "Ios16.conv" : 2,
+      "Ios16.add" : 3,
+      "Ios16.square" : 2,
+      "Ios16.matmul" : 1,
+      "Squeeze" : 2,
+      "Ios16.maximum" : 1,
+      "ExpandDims" : 4,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Ios16.reshape" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..ec8c32be0159fb2faeef6da3346717706dc89a0e
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios16>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            tensor<string, []> input_3_mode_0 = const()[name = tensor<string, []>("input_3_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = tensor<string, []>("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = tensor<string, []>("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = tensor<string, []>("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = tensor<string, []>("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = tensor<string, []>("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = tensor<string, []>("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = tensor<string, []>("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = tensor<string, []>("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = tensor<string, []>("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = tensor<string, []>("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = tensor<string, []>("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = tensor<string, []>("magnitudes_cast_fp16")];
+            tensor<bool, []> mel_spec_1_transpose_x_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> mel_spec_1_transpose_y_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = tensor<string, []>("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = tensor<string, []>("mel_spec_1_cast_fp16")];
+            tensor<fp16, []> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = tensor<string, []>("mel_spec_cast_fp16")];
+            tensor<fp16, []> log_0_epsilon_0_to_fp16 = const()[name = tensor<string, []>("log_0_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0_to_fp16, x = mel_spec_cast_fp16)[name = tensor<string, []>("log_0_cast_fp16")];
+            tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
+            tensor<bool, []> var_44_keep_dims_0 = const()[name = tensor<string, []>("op_44_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, []> var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+3)];
+            tensor<fp16, []> var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = tensor<string, []>("log_spec_3_cast_fp16")];
+            tensor<fp16, []> var_50_to_fp16 = const()[name = tensor<string, []>("op_50_to_fp16"), val = tensor<fp16, []>(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<fp16, []> _inversed_log_spec_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_log_spec_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = tensor<string, []>("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = tensor<string, []>("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f7d28ffac464e9e7086a526930f0059187de8d01
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:801024dbc7a89c677be1f8b285de3409e35f7d1786c9c8d9d0d6842ac57a1c83
+size 354080
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Data/com.apple.CoreML/model.mlmodel b/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Data/com.apple.CoreML/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..c8bbd876928118691892e82f4e5c77974581b426
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Data/com.apple.CoreML/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e90f6d8c7ccaa25cecefaee45e4ac31c2fc9f8e0b7c0f69c4a01d8646add5d7
+size 8950
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Data/com.apple.CoreML/weights/weight.bin b/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Data/com.apple.CoreML/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..14e352b967781fc2795adb3a74fa4b2f0347259c
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Data/com.apple.CoreML/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6419aa141b1b0f06ec2de0074a65cd7a5e2eb59fe93d43554ec857067d444891
+size 354080
diff --git a/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Manifest.json b/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Manifest.json
new file mode 100644
index 0000000000000000000000000000000000000000..236d577bdfd5b8d3a4e4631728b1b7b2874aed77
--- /dev/null
+++ b/openai_whisper-tiny.en/MelSpectrogram.mlpackage/Manifest.json
@@ -0,0 +1,18 @@
+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "2048FA7C-387C-4B5B-8A5A-3D0743C785BF": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "5E0F5547-1F38-4DA8-BB4F-FE149347BD45": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "2048FA7C-387C-4B5B-8A5A-3D0743C785BF"
+}
diff --git a/openai_whisper-tiny.en/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-tiny.en/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..85cef1528a4882aabb4fa9afee0b0e77aea3c3ff
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edb99a30ccee8e157fbec80dc3dce49349ba0982391b327d753e10ccab0a01c3
+size 243
diff --git a/openai_whisper-tiny.en/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-tiny.en/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bbd14e70330739bb0da096d4df06e15509c0e62f
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65c043a081845d190918b4c7d244f94a55df1a15fae796abedc1f414995542c6
+size 633
diff --git a/openai_whisper-tiny.en/TextDecoder.mlmodelc/metadata.json b/openai_whisper-tiny.en/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..088dd60f18d80afc4d31446ec103c8bf5e487bc2
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,165 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51864)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51864]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Split" : 2,
+      "Concat" : 3,
+      "Squeeze" : 1,
+      "Ios16.mul" : 24,
+      "Ios16.layerNorm" : 13,
+      "SliceByIndex" : 16,
+      "Ios16.sub" : 1,
+      "Transpose" : 1,
+      "Ios16.conv" : 40,
+      "Ios16.add" : 25,
+      "Ios16.linear" : 1,
+      "Ios16.matmul" : 16,
+      "Ios16.gelu" : 4,
+      "Ios16.reduceMean" : 1,
+      "ExpandDims" : 6,
+      "Ios16.batchNorm" : 13,
+      "Ios16.gather" : 2,
+      "Ios16.reshape" : 32,
+      "Ios16.softmax" : 8
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 448]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 448]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 384 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 384, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/TextDecoder.mlmodelc/model.mil b/openai_whisper-tiny.en/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..3dcee715f722e39d062e3f9e0de70135c5b95bbc
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,718 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, tensor<fp16, [1, 384, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 1536, 1, 448]> key_cache, tensor<fp16, [1, 448]> kv_cache_update_mask, tensor<fp16, [1, 1536, 1, 448]> value_cache) {
+            tensor<int32, []> var_24_axis_0 = const()[name = tensor<string, []>("op_24_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_24_batch_dims_0 = const()[name = tensor<string, []>("op_24_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [51864, 384]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51864, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 384]> var_24_cast_fp16 = gather(axis = var_24_axis_0, batch_dims = var_24_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_24_cast_fp16")];
+            tensor<int32, []> var_28_axis_0 = const()[name = tensor<string, []>("op_28_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_28_batch_dims_0 = const()[name = tensor<string, []>("op_28_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [448, 384]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39831680)))];
+            tensor<fp16, [1, 384]> var_28_cast_fp16 = gather(axis = var_28_axis_0, batch_dims = var_28_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
+            tensor<fp16, [1, 384]> hidden_states_1_cast_fp16 = add(x = var_24_cast_fp16, y = var_28_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1]> var_42_cast_fp16 = expand_dims(axes = var_42_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_42_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 384, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_42_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, [4]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [4]>([384, 384, 384, 384])];
+            tensor<int32, []> var_47_axis_0 = const()[name = tensor<string, []>("op_47_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 384, 1, 448]> var_47_cast_fp16_0, tensor<fp16, [1, 384, 1, 448]> var_47_cast_fp16_1, tensor<fp16, [1, 384, 1, 448]> var_47_cast_fp16_2, tensor<fp16, [1, 384, 1, 448]> var_47_cast_fp16_3 = split(axis = var_47_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<int32, [4]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [4]>([384, 384, 384, 384])];
+            tensor<int32, []> var_54_axis_0 = const()[name = tensor<string, []>("op_54_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 384, 1, 448]> var_54_cast_fp16_0, tensor<fp16, [1, 384, 1, 448]> var_54_cast_fp16_1, tensor<fp16, [1, 384, 1, 448]> var_54_cast_fp16_2, tensor<fp16, [1, 384, 1, 448]> var_54_cast_fp16_3 = split(axis = var_54_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_54_cast_fp16")];
+            tensor<int32, []> var_64 = const()[name = tensor<string, []>("op_64"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = tensor<string, []>("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_90_to_fp16 = const()[name = tensor<string, []>("op_90_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_90_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [384]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40175808)))];
+            tensor<fp16, [384]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40176640)))];
+            tensor<fp16, [384]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40177472)))];
+            tensor<fp16, [384]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40178304)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_1_strides_0 = const()[name = tensor<string, []>("query_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_1_dilations_0 = const()[name = tensor<string, []>("query_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_1_groups_0 = const()[name = tensor<string, []>("query_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40179136)))];
+            tensor<fp16, [384]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40474112)))];
+            tensor<fp16, [1, 384, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_1_strides_0 = const()[name = tensor<string, []>("current_key_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_1_dilations_0 = const()[name = tensor<string, []>("current_key_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_1_groups_0 = const()[name = tensor<string, []>("current_key_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40474944)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_1_cast_fp16 = conv(dilations = current_key_1_dilations_0, groups = current_key_1_groups_0, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = current_key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
+            tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_1_strides_0 = const()[name = tensor<string, []>("current_value_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_1_dilations_0 = const()[name = tensor<string, []>("current_value_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_1_groups_0 = const()[name = tensor<string, []>("current_value_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40769920)))];
+            tensor<fp16, [384]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41064896)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_1_dilations_0, groups = current_value_1_groups_0, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = current_value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_125_axes_0 = const()[name = tensor<string, []>("op_125_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_125_cast_fp16 = expand_dims(axes = var_125_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_125_cast_fp16")];
+            tensor<int32, [1]> var_126_axes_0 = const()[name = tensor<string, []>("op_126_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_126_cast_fp16 = expand_dims(axes = var_126_axes_0, x = var_125_cast_fp16)[name = tensor<string, []>("op_126_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_128_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_128_cast_fp16")];
+            tensor<fp16, []> var_65_to_fp16 = const()[name = tensor<string, []>("op_65_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 1, 1, 448]> var_129_cast_fp16 = sub(x = var_65_to_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_130_cast_fp16 = mul(x = var_47_cast_fp16_0, y = var_129_cast_fp16)[name = tensor<string, []>("op_130_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> key_1_cast_fp16 = add(x = var_128_cast_fp16, y = var_130_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_132_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_132_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_134_cast_fp16 = mul(x = var_54_cast_fp16_0, y = var_129_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> value_1_cast_fp16 = add(x = var_132_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_137 = const()[name = tensor<string, []>("op_137"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_137, x = query_1_cast_fp16)[name = tensor<string, []>("mh_q_1_cast_fp16")];
+            tensor<fp16, []> var_139_to_fp16 = const()[name = tensor<string, []>("op_139_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_140_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_139_to_fp16)[name = tensor<string, []>("op_140_cast_fp16")];
+            tensor<int32, [4]> var_141 = const()[name = tensor<string, []>("op_141"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_142_cast_fp16 = reshape(shape = var_141, x = key_1_cast_fp16)[name = tensor<string, []>("op_142_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_140_cast_fp16, y = var_142_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_146_axes_0 = const()[name = tensor<string, []>("op_146_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_146_cast_fp16 = expand_dims(axes = var_146_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_146_cast_fp16")];
+            tensor<int32, [1]> var_147_axes_0 = const()[name = tensor<string, []>("op_147_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_147_cast_fp16 = expand_dims(axes = var_147_axes_0, x = var_146_cast_fp16)[name = tensor<string, []>("op_147_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> var_150_cast_fp16 = softmax(axis = var_64, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_150_cast_fp16")];
+            tensor<int32, [4]> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_152_cast_fp16 = reshape(shape = var_151, x = value_1_cast_fp16)[name = tensor<string, []>("op_152_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_152_cast_fp16, y = var_150_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_155 = const()[name = tensor<string, []>("op_155"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_1_cast_fp16 = reshape(shape = var_155, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_7_strides_0 = const()[name = tensor<string, []>("obj_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_7_dilations_0 = const()[name = tensor<string, []>("obj_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_7_groups_0 = const()[name = tensor<string, []>("obj_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41065728)))];
+            tensor<fp16, [384]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41360704)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = tensor<string, []>("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_177_to_fp16 = const()[name = tensor<string, []>("op_177_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_177_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [384]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41361536)))];
+            tensor<fp16, [384]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41362368)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_3_strides_0 = const()[name = tensor<string, []>("query_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_3_dilations_0 = const()[name = tensor<string, []>("query_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_3_groups_0 = const()[name = tensor<string, []>("query_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41363200)))];
+            tensor<fp16, [384]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41658176)))];
+            tensor<fp16, [1, 384, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_3_strides_0 = const()[name = tensor<string, []>("key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_3_dilations_0 = const()[name = tensor<string, []>("key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_3_groups_0 = const()[name = tensor<string, []>("key_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41659008)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_3_strides_0 = const()[name = tensor<string, []>("value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_3_dilations_0 = const()[name = tensor<string, []>("value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_3_groups_0 = const()[name = tensor<string, []>("value_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41953984)))];
+            tensor<fp16, [384]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42248960)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_212 = const()[name = tensor<string, []>("op_212"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_212, x = query_3_cast_fp16)[name = tensor<string, []>("mh_q_3_cast_fp16")];
+            tensor<fp16, []> var_214_to_fp16 = const()[name = tensor<string, []>("op_214_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_215_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_214_to_fp16)[name = tensor<string, []>("op_215_cast_fp16")];
+            tensor<int32, [4]> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_217_cast_fp16 = reshape(shape = var_216, x = key_3_cast_fp16)[name = tensor<string, []>("op_217_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_215_cast_fp16, y = var_217_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_64, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [4]> var_221 = const()[name = tensor<string, []>("op_221"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_222_cast_fp16 = reshape(shape = var_221, x = value_3_cast_fp16)[name = tensor<string, []>("op_222_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_222_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_225 = const()[name = tensor<string, []>("op_225"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_3_cast_fp16 = reshape(shape = var_225, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_11_strides_0 = const()[name = tensor<string, []>("obj_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_11_dilations_0 = const()[name = tensor<string, []>("obj_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_11_groups_0 = const()[name = tensor<string, []>("obj_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42249792)))];
+            tensor<fp16, [384]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42544768)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = tensor<string, []>("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_243_to_fp16 = const()[name = tensor<string, []>("op_243_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_243_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [384]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42545600)))];
+            tensor<fp16, [384]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42546432)))];
+            tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_7_strides_0 = const()[name = tensor<string, []>("input_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_7_dilations_0 = const()[name = tensor<string, []>("input_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_7_groups_0 = const()[name = tensor<string, []>("input_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42547264)))];
+            tensor<fp16, [1536]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43726976)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_3_strides_0 = const()[name = tensor<string, []>("hidden_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_3_dilations_0 = const()[name = tensor<string, []>("hidden_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_3_groups_0 = const()[name = tensor<string, []>("hidden_states_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43730112)))];
+            tensor<fp16, [384]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44909824)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, []> var_278 = const()[name = tensor<string, []>("op_278"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = tensor<string, []>("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_304_to_fp16 = const()[name = tensor<string, []>("op_304_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_304_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [384]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44910656)))];
+            tensor<fp16, [384]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44911488)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_5_strides_0 = const()[name = tensor<string, []>("query_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_5_dilations_0 = const()[name = tensor<string, []>("query_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_5_groups_0 = const()[name = tensor<string, []>("query_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44912320)))];
+            tensor<fp16, [384]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45207296)))];
+            tensor<fp16, [1, 384, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_3_strides_0 = const()[name = tensor<string, []>("current_key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_3_dilations_0 = const()[name = tensor<string, []>("current_key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_3_groups_0 = const()[name = tensor<string, []>("current_key_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45208128)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_3_cast_fp16 = conv(dilations = current_key_3_dilations_0, groups = current_key_3_groups_0, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = current_key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
+            tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_3_strides_0 = const()[name = tensor<string, []>("current_value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_3_dilations_0 = const()[name = tensor<string, []>("current_value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_3_groups_0 = const()[name = tensor<string, []>("current_value_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45503104)))];
+            tensor<fp16, [384]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45798080)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = current_value_3_dilations_0, groups = current_value_3_groups_0, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = current_value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_342_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_342_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_344_cast_fp16 = mul(x = var_47_cast_fp16_1, y = var_129_cast_fp16)[name = tensor<string, []>("op_344_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> key_5_cast_fp16 = add(x = var_342_cast_fp16, y = var_344_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_346_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_348_cast_fp16 = mul(x = var_54_cast_fp16_1, y = var_129_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> value_5_cast_fp16 = add(x = var_346_cast_fp16, y = var_348_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_351 = const()[name = tensor<string, []>("op_351"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_351, x = query_5_cast_fp16)[name = tensor<string, []>("mh_q_5_cast_fp16")];
+            tensor<fp16, []> var_353_to_fp16 = const()[name = tensor<string, []>("op_353_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_354_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_353_to_fp16)[name = tensor<string, []>("op_354_cast_fp16")];
+            tensor<int32, [4]> var_355 = const()[name = tensor<string, []>("op_355"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_356_cast_fp16 = reshape(shape = var_355, x = key_5_cast_fp16)[name = tensor<string, []>("op_356_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_354_cast_fp16, y = var_356_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> var_364_cast_fp16 = softmax(axis = var_278, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<int32, [4]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_366_cast_fp16 = reshape(shape = var_365, x = value_5_cast_fp16)[name = tensor<string, []>("op_366_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_366_cast_fp16, y = var_364_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_11_cast_fp16 = reshape(shape = var_369, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_21_strides_0 = const()[name = tensor<string, []>("obj_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_21_dilations_0 = const()[name = tensor<string, []>("obj_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_21_groups_0 = const()[name = tensor<string, []>("obj_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45798912)))];
+            tensor<fp16, [384]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46093888)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_21_dilations_0, groups = obj_21_groups_0, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = obj_21_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = tensor<string, []>("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_391_to_fp16 = const()[name = tensor<string, []>("op_391_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_391_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [384]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46094720)))];
+            tensor<fp16, [384]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46095552)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_7_strides_0 = const()[name = tensor<string, []>("query_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_7_dilations_0 = const()[name = tensor<string, []>("query_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_7_groups_0 = const()[name = tensor<string, []>("query_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46096384)))];
+            tensor<fp16, [384]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46391360)))];
+            tensor<fp16, [1, 384, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_7_strides_0 = const()[name = tensor<string, []>("key_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_7_dilations_0 = const()[name = tensor<string, []>("key_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_7_groups_0 = const()[name = tensor<string, []>("key_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46392192)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_7_cast_fp16 = conv(dilations = key_7_dilations_0, groups = key_7_groups_0, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = key_7_strides_0, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_7_strides_0 = const()[name = tensor<string, []>("value_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_7_dilations_0 = const()[name = tensor<string, []>("value_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_7_groups_0 = const()[name = tensor<string, []>("value_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46687168)))];
+            tensor<fp16, [384]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46982144)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = value_7_dilations_0, groups = value_7_groups_0, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = value_7_strides_0, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_426 = const()[name = tensor<string, []>("op_426"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_426, x = query_7_cast_fp16)[name = tensor<string, []>("mh_q_7_cast_fp16")];
+            tensor<fp16, []> var_428_to_fp16 = const()[name = tensor<string, []>("op_428_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_429_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_428_to_fp16)[name = tensor<string, []>("op_429_cast_fp16")];
+            tensor<int32, [4]> var_430 = const()[name = tensor<string, []>("op_430"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_431_cast_fp16 = reshape(shape = var_430, x = key_7_cast_fp16)[name = tensor<string, []>("op_431_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_429_cast_fp16, y = var_431_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_278, x = mh_w_11_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<int32, [4]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_436_cast_fp16 = reshape(shape = var_435, x = value_7_cast_fp16)[name = tensor<string, []>("op_436_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_436_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_439 = const()[name = tensor<string, []>("op_439"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_13_cast_fp16 = reshape(shape = var_439, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_25_strides_0 = const()[name = tensor<string, []>("obj_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_25_dilations_0 = const()[name = tensor<string, []>("obj_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_25_groups_0 = const()[name = tensor<string, []>("obj_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46982976)))];
+            tensor<fp16, [384]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47277952)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = obj_25_dilations_0, groups = obj_25_groups_0, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = obj_25_strides_0, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = tensor<string, []>("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_460_to_fp16 = const()[name = tensor<string, []>("op_460_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_460_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [384]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47278784)))];
+            tensor<fp16, [384]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47279616)))];
+            tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_17_strides_0 = const()[name = tensor<string, []>("input_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_17_dilations_0 = const()[name = tensor<string, []>("input_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_17_groups_0 = const()[name = tensor<string, []>("input_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47280448)))];
+            tensor<fp16, [1536]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48460160)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_5_strides_0 = const()[name = tensor<string, []>("hidden_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_5_dilations_0 = const()[name = tensor<string, []>("hidden_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_5_groups_0 = const()[name = tensor<string, []>("hidden_states_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48463296)))];
+            tensor<fp16, [384]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49643008)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_496 = const()[name = tensor<string, []>("op_496"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = tensor<string, []>("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_522_to_fp16 = const()[name = tensor<string, []>("op_522_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_522_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [384]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49643840)))];
+            tensor<fp16, [384]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49644672)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_9_strides_0 = const()[name = tensor<string, []>("query_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_9_dilations_0 = const()[name = tensor<string, []>("query_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_9_groups_0 = const()[name = tensor<string, []>("query_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49645504)))];
+            tensor<fp16, [384]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49940480)))];
+            tensor<fp16, [1, 384, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_9_dilations_0, groups = query_9_groups_0, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = query_9_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<string, []> current_key_5_pad_type_0 = const()[name = tensor<string, []>("current_key_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_5_strides_0 = const()[name = tensor<string, []>("current_key_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_5_pad_0 = const()[name = tensor<string, []>("current_key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_5_dilations_0 = const()[name = tensor<string, []>("current_key_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_5_groups_0 = const()[name = tensor<string, []>("current_key_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49941312)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_5_cast_fp16 = conv(dilations = current_key_5_dilations_0, groups = current_key_5_groups_0, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = current_key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_key_5_cast_fp16")];
+            tensor<string, []> current_value_5_pad_type_0 = const()[name = tensor<string, []>("current_value_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_5_strides_0 = const()[name = tensor<string, []>("current_value_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_5_pad_0 = const()[name = tensor<string, []>("current_value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_5_dilations_0 = const()[name = tensor<string, []>("current_value_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_5_groups_0 = const()[name = tensor<string, []>("current_value_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50236288)))];
+            tensor<fp16, [384]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50531264)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = current_value_5_dilations_0, groups = current_value_5_groups_0, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = current_value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_560_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_562_cast_fp16 = mul(x = var_47_cast_fp16_2, y = var_129_cast_fp16)[name = tensor<string, []>("op_562_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> key_9_cast_fp16 = add(x = var_560_cast_fp16, y = var_562_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_564_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_564_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_566_cast_fp16 = mul(x = var_54_cast_fp16_2, y = var_129_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> value_9_cast_fp16 = add(x = var_564_cast_fp16, y = var_566_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_569 = const()[name = tensor<string, []>("op_569"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_569, x = query_9_cast_fp16)[name = tensor<string, []>("mh_q_9_cast_fp16")];
+            tensor<fp16, []> var_571_to_fp16 = const()[name = tensor<string, []>("op_571_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_572_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_571_to_fp16)[name = tensor<string, []>("op_572_cast_fp16")];
+            tensor<int32, [4]> var_573 = const()[name = tensor<string, []>("op_573"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_574_cast_fp16 = reshape(shape = var_573, x = key_9_cast_fp16)[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_572_cast_fp16, y = var_574_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> var_582_cast_fp16 = softmax(axis = var_496, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_582_cast_fp16")];
+            tensor<int32, [4]> var_583 = const()[name = tensor<string, []>("op_583"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_584_cast_fp16 = reshape(shape = var_583, x = value_9_cast_fp16)[name = tensor<string, []>("op_584_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_584_cast_fp16, y = var_582_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_587 = const()[name = tensor<string, []>("op_587"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_21_cast_fp16 = reshape(shape = var_587, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_35_strides_0 = const()[name = tensor<string, []>("obj_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_35_dilations_0 = const()[name = tensor<string, []>("obj_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_35_groups_0 = const()[name = tensor<string, []>("obj_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50532096)))];
+            tensor<fp16, [384]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50827072)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = tensor<string, []>("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_609_to_fp16 = const()[name = tensor<string, []>("op_609_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_609_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [384]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50827904)))];
+            tensor<fp16, [384]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50828736)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_11_strides_0 = const()[name = tensor<string, []>("query_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_11_dilations_0 = const()[name = tensor<string, []>("query_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_11_groups_0 = const()[name = tensor<string, []>("query_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50829568)))];
+            tensor<fp16, [384]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51124544)))];
+            tensor<fp16, [1, 384, 1, 1]> query_11_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = query_11_dilations_0, groups = query_11_groups_0, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = query_11_strides_0, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_11_strides_0 = const()[name = tensor<string, []>("key_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_11_dilations_0 = const()[name = tensor<string, []>("key_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_11_groups_0 = const()[name = tensor<string, []>("key_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51125376)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_11_cast_fp16 = conv(dilations = key_11_dilations_0, groups = key_11_groups_0, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = key_11_strides_0, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_11_strides_0 = const()[name = tensor<string, []>("value_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_11_dilations_0 = const()[name = tensor<string, []>("value_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_11_groups_0 = const()[name = tensor<string, []>("value_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51420352)))];
+            tensor<fp16, [384]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51715328)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = value_11_dilations_0, groups = value_11_groups_0, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = value_11_strides_0, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_644 = const()[name = tensor<string, []>("op_644"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_11_cast_fp16 = reshape(shape = var_644, x = query_11_cast_fp16)[name = tensor<string, []>("mh_q_11_cast_fp16")];
+            tensor<fp16, []> var_646_to_fp16 = const()[name = tensor<string, []>("op_646_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_647_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_646_to_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
+            tensor<int32, [4]> var_648 = const()[name = tensor<string, []>("op_648"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_649_cast_fp16 = reshape(shape = var_648, x = key_11_cast_fp16)[name = tensor<string, []>("op_649_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_647_cast_fp16, y = var_649_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_41_cast_fp16 = softmax(axis = var_496, x = mh_w_17_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_653 = const()[name = tensor<string, []>("op_653"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_654_cast_fp16 = reshape(shape = var_653, x = value_11_cast_fp16)[name = tensor<string, []>("op_654_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_654_cast_fp16, y = obj_41_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_657 = const()[name = tensor<string, []>("op_657"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_23_cast_fp16 = reshape(shape = var_657, x = attn_11_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_39_strides_0 = const()[name = tensor<string, []>("obj_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_39_dilations_0 = const()[name = tensor<string, []>("obj_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_39_groups_0 = const()[name = tensor<string, []>("obj_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51716160)))];
+            tensor<fp16, [384]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52011136)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_39_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = obj_39_dilations_0, groups = obj_39_groups_0, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = obj_39_strides_0, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = tensor<string, []>("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_678_to_fp16 = const()[name = tensor<string, []>("op_678_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_678_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [384]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52011968)))];
+            tensor<fp16, [384]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52012800)))];
+            tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_27_strides_0 = const()[name = tensor<string, []>("input_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_27_dilations_0 = const()[name = tensor<string, []>("input_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_27_groups_0 = const()[name = tensor<string, []>("input_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52013632)))];
+            tensor<fp16, [1536]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53193344)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_mode_0 = const()[name = tensor<string, []>("input_29_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = tensor<string, []>("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = tensor<string, []>("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_7_groups_0 = const()[name = tensor<string, []>("hidden_states_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53196480)))];
+            tensor<fp16, [384]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54376192)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, []> var_714 = const()[name = tensor<string, []>("op_714"), val = tensor<int32, []>(3)];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = tensor<string, []>("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_740_to_fp16 = const()[name = tensor<string, []>("op_740_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_740_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [384]> obj_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54377024)))];
+            tensor<fp16, [384]> obj_43_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_43_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54377856)))];
+            tensor<fp16, []> obj_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_13_strides_0 = const()[name = tensor<string, []>("query_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_13_dilations_0 = const()[name = tensor<string, []>("query_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_13_groups_0 = const()[name = tensor<string, []>("query_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54378688)))];
+            tensor<fp16, [384]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54673664)))];
+            tensor<fp16, [1, 384, 1, 1]> query_13_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_key_strides_0 = const()[name = tensor<string, []>("current_key_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_key_dilations_0 = const()[name = tensor<string, []>("current_key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_key_groups_0 = const()[name = tensor<string, []>("current_key_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54674496)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_cast_fp16 = conv(dilations = current_key_dilations_0, groups = current_key_groups_0, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = current_key_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
+            tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> current_value_strides_0 = const()[name = tensor<string, []>("current_value_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> current_value_dilations_0 = const()[name = tensor<string, []>("current_value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> current_value_groups_0 = const()[name = tensor<string, []>("current_value_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54969472)))];
+            tensor<fp16, [384]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55264448)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = current_value_dilations_0, groups = current_value_groups_0, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = current_value_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_778_cast_fp16 = mul(x = current_key_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_778_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_780_cast_fp16 = mul(x = var_47_cast_fp16_3, y = var_129_cast_fp16)[name = tensor<string, []>("op_780_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> key_13_cast_fp16 = add(x = var_778_cast_fp16, y = var_780_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_782_cast_fp16 = mul(x = current_value_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_782_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> var_784_cast_fp16 = mul(x = var_54_cast_fp16_3, y = var_129_cast_fp16)[name = tensor<string, []>("op_784_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 448]> value_13_cast_fp16 = add(x = var_782_cast_fp16, y = var_784_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_787 = const()[name = tensor<string, []>("op_787"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_13_cast_fp16 = reshape(shape = var_787, x = query_13_cast_fp16)[name = tensor<string, []>("mh_q_13_cast_fp16")];
+            tensor<fp16, []> var_789_to_fp16 = const()[name = tensor<string, []>("op_789_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_790_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_789_to_fp16)[name = tensor<string, []>("op_790_cast_fp16")];
+            tensor<int32, [4]> var_791 = const()[name = tensor<string, []>("op_791"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_792_cast_fp16 = reshape(shape = var_791, x = key_13_cast_fp16)[name = tensor<string, []>("op_792_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_790_cast_fp16, y = var_792_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 448]> var_800_cast_fp16 = softmax(axis = var_714, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_800_cast_fp16")];
+            tensor<int32, [4]> var_801 = const()[name = tensor<string, []>("op_801"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 448]> var_802_cast_fp16 = reshape(shape = var_801, x = value_13_cast_fp16)[name = tensor<string, []>("op_802_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_802_cast_fp16, y = var_800_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_805 = const()[name = tensor<string, []>("op_805"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_31_cast_fp16 = reshape(shape = var_805, x = attn_13_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> obj_49_pad_type_0 = const()[name = tensor<string, []>("obj_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_49_strides_0 = const()[name = tensor<string, []>("obj_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_49_pad_0 = const()[name = tensor<string, []>("obj_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_49_dilations_0 = const()[name = tensor<string, []>("obj_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_49_groups_0 = const()[name = tensor<string, []>("obj_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55265280)))];
+            tensor<fp16, [384]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55560256)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_49_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_49_dilations_0, groups = obj_49_groups_0, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = obj_49_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = tensor<string, []>("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_827_to_fp16 = const()[name = tensor<string, []>("op_827_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_827_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [384]> obj_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55561088)))];
+            tensor<fp16, [384]> obj_51_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_51_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55561920)))];
+            tensor<fp16, []> obj_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> query_strides_0 = const()[name = tensor<string, []>("query_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> query_dilations_0 = const()[name = tensor<string, []>("query_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> query_groups_0 = const()[name = tensor<string, []>("query_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55562752)))];
+            tensor<fp16, [384]> layers_3_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55857728)))];
+            tensor<fp16, [1, 384, 1, 1]> query_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_3_encoder_attn_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> key_strides_0 = const()[name = tensor<string, []>("key_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> key_dilations_0 = const()[name = tensor<string, []>("key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> key_groups_0 = const()[name = tensor<string, []>("key_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55858560)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_3_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> value_strides_0 = const()[name = tensor<string, []>("value_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> value_dilations_0 = const()[name = tensor<string, []>("value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> value_groups_0 = const()[name = tensor<string, []>("value_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56153536)))];
+            tensor<fp16, [384]> layers_3_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56448512)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_3_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_862 = const()[name = tensor<string, []>("op_862"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_862, x = query_cast_fp16)[name = tensor<string, []>("mh_q_cast_fp16")];
+            tensor<fp16, []> var_864_to_fp16 = const()[name = tensor<string, []>("op_864_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_865_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_864_to_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<int32, [4]> var_866 = const()[name = tensor<string, []>("op_866"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_867_cast_fp16 = reshape(shape = var_866, x = key_cast_fp16)[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_865_cast_fp16, y = var_867_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_55_cast_fp16 = softmax(axis = var_714, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<int32, [4]> var_871 = const()[name = tensor<string, []>("op_871"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_872_cast_fp16 = reshape(shape = var_871, x = value_cast_fp16)[name = tensor<string, []>("op_872_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_872_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_33_cast_fp16 = reshape(shape = var_875, x = attn_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> obj_53_pad_type_0 = const()[name = tensor<string, []>("obj_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> obj_53_strides_0 = const()[name = tensor<string, []>("obj_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> obj_53_pad_0 = const()[name = tensor<string, []>("obj_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> obj_53_dilations_0 = const()[name = tensor<string, []>("obj_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> obj_53_groups_0 = const()[name = tensor<string, []>("obj_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56449344)))];
+            tensor<fp16, [384]> layers_3_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56744320)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_53_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_bias_to_fp16, dilations = obj_53_dilations_0, groups = obj_53_groups_0, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = obj_53_strides_0, weight = layers_3_encoder_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = tensor<string, []>("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_896_to_fp16 = const()[name = tensor<string, []>("op_896_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_896_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [384]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56745152)))];
+            tensor<fp16, [384]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56745984)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_37_strides_0 = const()[name = tensor<string, []>("input_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_37_dilations_0 = const()[name = tensor<string, []>("input_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_37_groups_0 = const()[name = tensor<string, []>("input_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56746816)))];
+            tensor<fp16, [1536]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57926528)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_37_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> hidden_states_9_strides_0 = const()[name = tensor<string, []>("hidden_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> hidden_states_9_dilations_0 = const()[name = tensor<string, []>("hidden_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> hidden_states_9_groups_0 = const()[name = tensor<string, []>("hidden_states_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57929664)))];
+            tensor<fp16, [384]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59109376)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_9_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = tensor<string, []>("out_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, []> var_939_to_fp16 = const()[name = tensor<string, []>("op_939_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_939_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [384]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59110208)))];
+            tensor<fp16, [384]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59111040)))];
+            tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_950_axes_0 = const()[name = tensor<string, []>("op_950_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1]> var_950_cast_fp16 = squeeze(axes = var_950_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_950_cast_fp16")];
+            tensor<int32, [3]> var_953_perm_0 = const()[name = tensor<string, []>("op_953_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51864]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51864]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59111872)))];
+            tensor<fp16, [1, 1, 384]> var_953_cast_fp16 = transpose(perm = var_953_perm_0, x = var_950_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 51864]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_953_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_957 = const()[name = tensor<string, []>("op_957"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_59_interleave_0 = const()[name = tensor<string, []>("obj_59_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1536, 1, 1]> key_cache_updates = concat(axis = var_957, interleave = obj_59_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_59_cast_fp16")];
+            tensor<int32, []> var_960 = const()[name = tensor<string, []>("op_960"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_61_interleave_0 = const()[name = tensor<string, []>("obj_61_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1536, 1, 1]> value_cache_updates = concat(axis = var_960, interleave = obj_61_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_61_cast_fp16")];
+            tensor<int32, [4]> var_971_begin_0 = const()[name = tensor<string, []>("op_971_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_971_end_0 = const()[name = tensor<string, []>("op_971_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_971_end_mask_0 = const()[name = tensor<string, []>("op_971_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_971_cast_fp16 = slice_by_index(begin = var_971_begin_0, end = var_971_end_0, end_mask = var_971_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_971_cast_fp16")];
+            tensor<int32, [4]> var_974_begin_0 = const()[name = tensor<string, []>("op_974_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_974_end_0 = const()[name = tensor<string, []>("op_974_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_974_end_mask_0 = const()[name = tensor<string, []>("op_974_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_974_squeeze_mask_0 = const()[name = tensor<string, []>("op_974_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_974_cast_fp16 = slice_by_index(begin = var_974_begin_0, end = var_974_end_0, end_mask = var_974_end_mask_0, squeeze_mask = var_974_squeeze_mask_0, x = var_971_cast_fp16)[name = tensor<string, []>("op_974_cast_fp16")];
+            tensor<int32, [4]> var_989_begin_0 = const()[name = tensor<string, []>("op_989_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_989_end_0 = const()[name = tensor<string, []>("op_989_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_989_end_mask_0 = const()[name = tensor<string, []>("op_989_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_989_cast_fp16 = slice_by_index(begin = var_989_begin_0, end = var_989_end_0, end_mask = var_989_end_mask_0, x = obj_41_cast_fp16)[name = tensor<string, []>("op_989_cast_fp16")];
+            tensor<int32, [4]> var_992_begin_0 = const()[name = tensor<string, []>("op_992_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_992_end_0 = const()[name = tensor<string, []>("op_992_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_992_end_mask_0 = const()[name = tensor<string, []>("op_992_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_992_squeeze_mask_0 = const()[name = tensor<string, []>("op_992_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, squeeze_mask = var_992_squeeze_mask_0, x = var_989_cast_fp16)[name = tensor<string, []>("op_992_cast_fp16")];
+            tensor<int32, [4]> var_1007_begin_0 = const()[name = tensor<string, []>("op_1007_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_1007_end_0 = const()[name = tensor<string, []>("op_1007_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1500])];
+            tensor<bool, [4]> var_1007_end_mask_0 = const()[name = tensor<string, []>("op_1007_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = obj_41_cast_fp16)[name = tensor<string, []>("op_1007_cast_fp16")];
+            tensor<int32, [4]> var_1010_begin_0 = const()[name = tensor<string, []>("op_1010_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1010_end_0 = const()[name = tensor<string, []>("op_1010_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1010_end_mask_0 = const()[name = tensor<string, []>("op_1010_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1010_squeeze_mask_0 = const()[name = tensor<string, []>("op_1010_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1010_cast_fp16 = slice_by_index(begin = var_1010_begin_0, end = var_1010_end_0, end_mask = var_1010_end_mask_0, squeeze_mask = var_1010_squeeze_mask_0, x = var_1007_cast_fp16)[name = tensor<string, []>("op_1010_cast_fp16")];
+            tensor<int32, [4]> var_1025_begin_0 = const()[name = tensor<string, []>("op_1025_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1025_end_0 = const()[name = tensor<string, []>("op_1025_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1025_end_mask_0 = const()[name = tensor<string, []>("op_1025_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1025_cast_fp16 = slice_by_index(begin = var_1025_begin_0, end = var_1025_end_0, end_mask = var_1025_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1025_cast_fp16")];
+            tensor<int32, [4]> var_1028_begin_0 = const()[name = tensor<string, []>("op_1028_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1028_end_0 = const()[name = tensor<string, []>("op_1028_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1028_end_mask_0 = const()[name = tensor<string, []>("op_1028_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1028_squeeze_mask_0 = const()[name = tensor<string, []>("op_1028_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1028_cast_fp16 = slice_by_index(begin = var_1028_begin_0, end = var_1028_end_0, end_mask = var_1028_end_mask_0, squeeze_mask = var_1028_squeeze_mask_0, x = var_1025_cast_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
+            tensor<int32, [4]> var_1043_begin_0 = const()[name = tensor<string, []>("op_1043_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_1043_end_0 = const()[name = tensor<string, []>("op_1043_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1500])];
+            tensor<bool, [4]> var_1043_end_mask_0 = const()[name = tensor<string, []>("op_1043_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1043_cast_fp16 = slice_by_index(begin = var_1043_begin_0, end = var_1043_end_0, end_mask = var_1043_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1043_cast_fp16")];
+            tensor<int32, [4]> var_1046_begin_0 = const()[name = tensor<string, []>("op_1046_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1046_end_0 = const()[name = tensor<string, []>("op_1046_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1046_end_mask_0 = const()[name = tensor<string, []>("op_1046_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1046_squeeze_mask_0 = const()[name = tensor<string, []>("op_1046_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1046_cast_fp16 = slice_by_index(begin = var_1046_begin_0, end = var_1046_end_0, end_mask = var_1046_end_mask_0, squeeze_mask = var_1046_squeeze_mask_0, x = var_1043_cast_fp16)[name = tensor<string, []>("op_1046_cast_fp16")];
+            tensor<int32, [4]> var_1061_begin_0 = const()[name = tensor<string, []>("op_1061_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_1061_end_0 = const()[name = tensor<string, []>("op_1061_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_1061_end_mask_0 = const()[name = tensor<string, []>("op_1061_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1061_cast_fp16 = slice_by_index(begin = var_1061_begin_0, end = var_1061_end_0, end_mask = var_1061_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1061_cast_fp16")];
+            tensor<int32, [4]> var_1064_begin_0 = const()[name = tensor<string, []>("op_1064_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1064_end_0 = const()[name = tensor<string, []>("op_1064_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1064_end_mask_0 = const()[name = tensor<string, []>("op_1064_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1064_squeeze_mask_0 = const()[name = tensor<string, []>("op_1064_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1064_cast_fp16 = slice_by_index(begin = var_1064_begin_0, end = var_1064_end_0, end_mask = var_1064_end_mask_0, squeeze_mask = var_1064_squeeze_mask_0, x = var_1061_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<int32, [4]> var_1079_begin_0 = const()[name = tensor<string, []>("op_1079_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_1079_end_0 = const()[name = tensor<string, []>("op_1079_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_1079_end_mask_0 = const()[name = tensor<string, []>("op_1079_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1079_cast_fp16 = slice_by_index(begin = var_1079_begin_0, end = var_1079_end_0, end_mask = var_1079_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1079_cast_fp16")];
+            tensor<int32, [4]> var_1082_begin_0 = const()[name = tensor<string, []>("op_1082_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1082_end_0 = const()[name = tensor<string, []>("op_1082_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1082_end_mask_0 = const()[name = tensor<string, []>("op_1082_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1082_squeeze_mask_0 = const()[name = tensor<string, []>("op_1082_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1082_cast_fp16 = slice_by_index(begin = var_1082_begin_0, end = var_1082_end_0, end_mask = var_1082_end_mask_0, squeeze_mask = var_1082_squeeze_mask_0, x = var_1079_cast_fp16)[name = tensor<string, []>("op_1082_cast_fp16")];
+            tensor<int32, [4]> var_1097_begin_0 = const()[name = tensor<string, []>("op_1097_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_1097_end_0 = const()[name = tensor<string, []>("op_1097_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_1097_end_mask_0 = const()[name = tensor<string, []>("op_1097_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1097_cast_fp16 = slice_by_index(begin = var_1097_begin_0, end = var_1097_end_0, end_mask = var_1097_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1097_cast_fp16")];
+            tensor<int32, [4]> var_1100_begin_0 = const()[name = tensor<string, []>("op_1100_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1100_end_0 = const()[name = tensor<string, []>("op_1100_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1100_end_mask_0 = const()[name = tensor<string, []>("op_1100_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1100_squeeze_mask_0 = const()[name = tensor<string, []>("op_1100_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1100_cast_fp16 = slice_by_index(begin = var_1100_begin_0, end = var_1100_end_0, end_mask = var_1100_end_mask_0, squeeze_mask = var_1100_squeeze_mask_0, x = var_1097_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<int32, []> var_1107 = const()[name = tensor<string, []>("op_1107"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1108_interleave_0 = const()[name = tensor<string, []>("op_1108_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 8, 1500]> var_1108_cast_fp16 = concat(axis = var_1107, interleave = var_1108_interleave_0, values = (var_974_cast_fp16, var_992_cast_fp16, var_1010_cast_fp16, var_1028_cast_fp16, var_1046_cast_fp16, var_1064_cast_fp16, var_1082_cast_fp16, var_1100_cast_fp16))[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<bool, []> var_1111 = const()[name = tensor<string, []>("op_1111"), val = tensor<bool, []>(false)];
+            tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_1111, x = var_1108_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-tiny.en/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..329f2902b7357215ee8169e998681736a46e099a
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c3e91bc036014426708e2ceb0e35cb1bbbf34e8121d2070d2b174a7957581d0
+size 108558
diff --git a/openai_whisper-tiny.en/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-tiny.en/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..728f49d97ea9837951270943de169c532ce106f7
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:763f915f0126093fc2c506572b3ab0fad134c04cfc2221333ccc7d73552c9252
+size 59215664
diff --git a/openai_whisper-tiny.en/TextDecoder.mlpackage/Data/com.apple.CoreML/model.mlmodel b/openai_whisper-tiny.en/TextDecoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..329f2902b7357215ee8169e998681736a46e099a
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c3e91bc036014426708e2ceb0e35cb1bbbf34e8121d2070d2b174a7957581d0
+size 108558
diff --git a/openai_whisper-tiny.en/TextDecoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin b/openai_whisper-tiny.en/TextDecoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e3d17ef17cfad70b70c77cfb1cbf8ade623dd357
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0a312346dc9aa97fab50e99d820c997968dfddd62695878086ba87795ca126
+size 59215664
diff --git a/openai_whisper-tiny.en/TextDecoder.mlpackage/Manifest.json b/openai_whisper-tiny.en/TextDecoder.mlpackage/Manifest.json
new file mode 100644
index 0000000000000000000000000000000000000000..a80c3e00acbedf2c96524f38f92b41bcc64d593a
--- /dev/null
+++ b/openai_whisper-tiny.en/TextDecoder.mlpackage/Manifest.json
@@ -0,0 +1,18 @@
+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "4F20E9B0-3984-4E42-84AA-8BBF92A2B7B2": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "B01071E1-B67E-4E85-830E-5EDD7EE041A2": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "4F20E9B0-3984-4E42-84AA-8BBF92A2B7B2"
+}
diff --git a/openai_whisper-tiny.en/config.json b/openai_whisper-tiny.en/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ecb97268374da7ff2954d163efecb1a45edf993a
--- /dev/null
+++ b/openai_whisper-tiny.en/config.json
@@ -0,0 +1 @@
+{"_name_or_path": "openai/whisper-tiny.en", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50256], "bos_token_id": 50257, "d_model": 384, "decoder_attention_heads": 6, "decoder_ffn_dim": 1536, "decoder_layerdrop": 0.0, "decoder_layers": 4, "decoder_start_token_id": 50257, "dropout": 0.0, "encoder_attention_heads": 6, "encoder_ffn_dim": 1536, "encoder_layerdrop": 0.0, "encoder_layers": 4, "eos_token_id": 50256, "forced_decoder_ids": [[1, 50362]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 4, "num_mel_bins": 80, "pad_token_id": 50256, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 357, 366, 438, 532, 685, 705, 796, 930, 1058, 1220, 1267, 1279, 1303, 1343, 1377, 1391, 1635, 1782, 1875, 2162, 2361, 2488, 3467, 4008, 4211, 4600, 4808, 5299, 5855, 6329, 7203, 9609, 9959, 10563, 10786, 11420, 11709, 11907, 13163, 13697, 13700, 14808, 15306, 16410, 16791, 17992, 19203, 19510, 20724, 22305, 22935, 27007, 30109, 30420, 33409, 34949, 40283, 40493, 40549, 47282, 49146, 50257, 50357, 50358, 50359, 50360, 50361], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51864}
\ No newline at end of file
diff --git a/openai_whisper-tiny.en/generation_config.json b/openai_whisper-tiny.en/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..08b7d03d8fcb3637053486e7149491feeb613e2d
--- /dev/null
+++ b/openai_whisper-tiny.en/generation_config.json
@@ -0,0 +1 @@
+{"alignment_heads": [[1, 0], [2, 0], [2, 5], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4]], "begin_suppress_tokens": [220, 50256], "bos_token_id": 50257, "decoder_start_token_id": 50257, "eos_token_id": 50256, "forced_decoder_ids": [[1, 50362]], "is_multilingual": false, "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50362, "pad_token_id": 50256, "prev_sot_token_id": 50360, "return_timestamps": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 357, 366, 438, 532, 685, 705, 796, 930, 1058, 1220, 1267, 1279, 1303, 1343, 1377, 1391, 1635, 1782, 1875, 2162, 2361, 2488, 3467, 4008, 4211, 4600, 4808, 5299, 5855, 6329, 7203, 9609, 9959, 10563, 10786, 11420, 11709, 11907, 13163, 13697, 13700, 14808, 15306, 16410, 16791, 17992, 19203, 19510, 20724, 22305, 22935, 27007, 30109, 30420, 33409, 34949, 40283, 40493, 40549, 47282, 49146, 50257, 50357, 50358, 50359, 50360, 50361], "transformers_version": "4.31.0.dev0"}
\ No newline at end of file
diff --git a/openai_whisper-tiny/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-tiny/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..056c9abe34fdef5d58f2823cd90234c01fdfa023
--- /dev/null
+++ b/openai_whisper-tiny/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b25820e5b2ab0b0686b4bea147fb217d1d1bface45170ff4ffde01fa6864ae2
+size 243
diff --git a/openai_whisper-tiny/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-tiny/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1428dd9bfb3229b29f93802d6a6ea632d98edd2f
--- /dev/null
+++ b/openai_whisper-tiny/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:142c33ade402fe41952059f175eb855093dfe09b5d2b84624a31e3a9952ed47d
+size 347
diff --git a/openai_whisper-tiny/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-tiny/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d12ec69b6863471619779e0eebb88ef7e712fde
--- /dev/null
+++ b/openai_whisper-tiny/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,69 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 384 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 384, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 28,
+      "Ios16.rsqrt" : 9,
+      "Ios16.mul" : 114,
+      "SliceByIndex" : 168,
+      "Ios16.sub" : 9,
+      "Transpose" : 4,
+      "Ios16.einsum" : 192,
+      "Ios16.conv" : 26,
+      "Ios16.add" : 18,
+      "Ios16.reduceMean" : 18,
+      "Ios16.softmax" : 96,
+      "Ios16.gelu" : 6,
+      "Ios16.batchNorm" : 9
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-tiny/AudioEncoder.mlmodelc/model.mil b/openai_whisper-tiny/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..dc9e1781fbb97a55017e33b13dc3b7d71d7e03cd
--- /dev/null
+++ b/openai_whisper-tiny/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,1766 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            tensor<int32, [2]> var_34 = const()[name = tensor<string, []>("op_34"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_40 = const()[name = tensor<string, []>("op_40"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_45 = const()[name = tensor<string, []>("op_45"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_50_pad_type_0 = const()[name = tensor<string, []>("op_50_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_50_pad_0 = const()[name = tensor<string, []>("op_50_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [384, 80, 1, 3]> var_25_to_fp16 = const()[name = tensor<string, []>("op_25_to_fp16"), val = tensor<fp16, [384, 80, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [384]> var_31_to_fp16 = const()[name = tensor<string, []>("op_31_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184448)))];
+            tensor<fp16, [1, 384, 1, 3000]> var_50_cast_fp16 = conv(bias = var_31_to_fp16, dilations = var_40, groups = var_45, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_34, weight = var_25_to_fp16, x = melspectrogram_features)[name = tensor<string, []>("op_50_cast_fp16")];
+            tensor<string, []> hidden_states_1_mode_0 = const()[name = tensor<string, []>("hidden_states_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_50_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [2]> var_74 = const()[name = tensor<string, []>("op_74"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_80 = const()[name = tensor<string, []>("op_80"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_85 = const()[name = tensor<string, []>("op_85"), val = tensor<int32, []>(1)];
+            tensor<string, []> var_90_pad_type_0 = const()[name = tensor<string, []>("op_90_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<fp16, [384, 384, 1, 3]> var_65_to_fp16 = const()[name = tensor<string, []>("op_65_to_fp16"), val = tensor<fp16, [384, 384, 1, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185280)))];
+            tensor<fp16, [384]> var_71_to_fp16 = const()[name = tensor<string, []>("op_71_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070080)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_90_cast_fp16 = conv(bias = var_71_to_fp16, dilations = var_80, groups = var_85, pad = var_90_pad_0, pad_type = var_90_pad_type_0, strides = var_74, weight = var_65_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_90_cast_fp16")];
+            tensor<string, []> hidden_states_3_mode_0 = const()[name = tensor<string, []>("hidden_states_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_90_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> var_108_to_fp16 = const()[name = tensor<string, []>("op_108_to_fp16"), val = tensor<fp16, [1, 384, 1, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070912)))];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_108_to_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_118 = const()[name = tensor<string, []>("op_118"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_129 = const()[name = tensor<string, []>("op_129"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_130 = const()[name = tensor<string, []>("op_130"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_140 = const()[name = tensor<string, []>("op_140"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_140, keep_dims = var_130, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_144 = const()[name = tensor<string, []>("op_144"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_145_cast_fp16 = reduce_mean(axes = var_144, keep_dims = var_130, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_145_cast_fp16")];
+            tensor<fp16, []> var_146_to_fp16 = const()[name = tensor<string, []>("op_146_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_147_cast_fp16 = add(x = var_145_cast_fp16, y = var_146_to_fp16)[name = tensor<string, []>("op_147_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_147_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [384]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2222976)))];
+            tensor<fp16, [384]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2223808)))];
+            tensor<fp16, [384]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2224640)))];
+            tensor<fp16, [384]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2225472)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_164 = const()[name = tensor<string, []>("op_164"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2226304)))];
+            tensor<fp16, [384]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2521280)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_164, groups = var_129, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_162, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_170 = const()[name = tensor<string, []>("op_170"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_1_pad_type_0 = const()[name = tensor<string, []>("key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_1_pad_0 = const()[name = tensor<string, []>("key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2522112)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_1_cast_fp16 = conv(dilations = var_170, groups = var_129, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = var_168, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<int32, [2]> var_175 = const()[name = tensor<string, []>("op_175"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_177 = const()[name = tensor<string, []>("op_177"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_1_pad_type_0 = const()[name = tensor<string, []>("value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_1_pad_0 = const()[name = tensor<string, []>("value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2817088)))];
+            tensor<fp16, [384]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3112064)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_177, groups = var_129, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = var_175, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_184_begin_0 = const()[name = tensor<string, []>("op_184_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_184_end_0 = const()[name = tensor<string, []>("op_184_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_184_end_mask_0 = const()[name = tensor<string, []>("op_184_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_184_cast_fp16 = slice_by_index(begin = var_184_begin_0, end = var_184_end_0, end_mask = var_184_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_184_cast_fp16")];
+            tensor<int32, [4]> var_188_begin_0 = const()[name = tensor<string, []>("op_188_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_188_end_0 = const()[name = tensor<string, []>("op_188_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_188_end_mask_0 = const()[name = tensor<string, []>("op_188_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_188_cast_fp16 = slice_by_index(begin = var_188_begin_0, end = var_188_end_0, end_mask = var_188_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_188_cast_fp16")];
+            tensor<int32, [4]> var_192_begin_0 = const()[name = tensor<string, []>("op_192_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_192_end_0 = const()[name = tensor<string, []>("op_192_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_192_end_mask_0 = const()[name = tensor<string, []>("op_192_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_192_cast_fp16 = slice_by_index(begin = var_192_begin_0, end = var_192_end_0, end_mask = var_192_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_192_cast_fp16")];
+            tensor<int32, [4]> var_196_begin_0 = const()[name = tensor<string, []>("op_196_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_196_end_0 = const()[name = tensor<string, []>("op_196_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_196_end_mask_0 = const()[name = tensor<string, []>("op_196_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_196_cast_fp16 = slice_by_index(begin = var_196_begin_0, end = var_196_end_0, end_mask = var_196_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_196_cast_fp16")];
+            tensor<int32, [4]> var_200_begin_0 = const()[name = tensor<string, []>("op_200_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_200_end_0 = const()[name = tensor<string, []>("op_200_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_200_end_mask_0 = const()[name = tensor<string, []>("op_200_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_200_cast_fp16 = slice_by_index(begin = var_200_begin_0, end = var_200_end_0, end_mask = var_200_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_200_cast_fp16")];
+            tensor<int32, [4]> var_204_begin_0 = const()[name = tensor<string, []>("op_204_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_204_end_0 = const()[name = tensor<string, []>("op_204_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_204_end_mask_0 = const()[name = tensor<string, []>("op_204_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_204_cast_fp16 = slice_by_index(begin = var_204_begin_0, end = var_204_end_0, end_mask = var_204_end_mask_0, x = query_1_cast_fp16)[name = tensor<string, []>("op_204_cast_fp16")];
+            tensor<int32, [4]> var_213_begin_0 = const()[name = tensor<string, []>("op_213_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_213_end_0 = const()[name = tensor<string, []>("op_213_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_213_end_mask_0 = const()[name = tensor<string, []>("op_213_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_213_cast_fp16 = slice_by_index(begin = var_213_begin_0, end = var_213_end_0, end_mask = var_213_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_213_cast_fp16")];
+            tensor<int32, [4]> var_220_begin_0 = const()[name = tensor<string, []>("op_220_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_220_end_0 = const()[name = tensor<string, []>("op_220_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_220_end_mask_0 = const()[name = tensor<string, []>("op_220_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_220_cast_fp16 = slice_by_index(begin = var_220_begin_0, end = var_220_end_0, end_mask = var_220_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_220_cast_fp16")];
+            tensor<int32, [4]> var_227_begin_0 = const()[name = tensor<string, []>("op_227_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_227_end_0 = const()[name = tensor<string, []>("op_227_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_227_end_mask_0 = const()[name = tensor<string, []>("op_227_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_227_cast_fp16")];
+            tensor<int32, [4]> var_234_begin_0 = const()[name = tensor<string, []>("op_234_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_234_end_0 = const()[name = tensor<string, []>("op_234_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_234_end_mask_0 = const()[name = tensor<string, []>("op_234_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = var_184_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<int32, [4]> var_241_begin_0 = const()[name = tensor<string, []>("op_241_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_241_end_0 = const()[name = tensor<string, []>("op_241_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_241_end_mask_0 = const()[name = tensor<string, []>("op_241_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_241_cast_fp16 = slice_by_index(begin = var_241_begin_0, end = var_241_end_0, end_mask = var_241_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_241_cast_fp16")];
+            tensor<int32, [4]> var_248_begin_0 = const()[name = tensor<string, []>("op_248_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_248_end_0 = const()[name = tensor<string, []>("op_248_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_248_end_mask_0 = const()[name = tensor<string, []>("op_248_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_248_cast_fp16 = slice_by_index(begin = var_248_begin_0, end = var_248_end_0, end_mask = var_248_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_248_cast_fp16")];
+            tensor<int32, [4]> var_255_begin_0 = const()[name = tensor<string, []>("op_255_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_255_end_0 = const()[name = tensor<string, []>("op_255_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_255_end_mask_0 = const()[name = tensor<string, []>("op_255_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
+            tensor<int32, [4]> var_262_begin_0 = const()[name = tensor<string, []>("op_262_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_262_end_0 = const()[name = tensor<string, []>("op_262_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_262_end_mask_0 = const()[name = tensor<string, []>("op_262_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = var_188_cast_fp16)[name = tensor<string, []>("op_262_cast_fp16")];
+            tensor<int32, [4]> var_269_begin_0 = const()[name = tensor<string, []>("op_269_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_269_end_0 = const()[name = tensor<string, []>("op_269_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_269_end_mask_0 = const()[name = tensor<string, []>("op_269_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_269_cast_fp16 = slice_by_index(begin = var_269_begin_0, end = var_269_end_0, end_mask = var_269_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_269_cast_fp16")];
+            tensor<int32, [4]> var_276_begin_0 = const()[name = tensor<string, []>("op_276_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_276_end_0 = const()[name = tensor<string, []>("op_276_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_276_end_mask_0 = const()[name = tensor<string, []>("op_276_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_276_cast_fp16 = slice_by_index(begin = var_276_begin_0, end = var_276_end_0, end_mask = var_276_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<int32, [4]> var_283_begin_0 = const()[name = tensor<string, []>("op_283_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_283_end_0 = const()[name = tensor<string, []>("op_283_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_283_end_mask_0 = const()[name = tensor<string, []>("op_283_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<int32, [4]> var_290_begin_0 = const()[name = tensor<string, []>("op_290_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_290_end_0 = const()[name = tensor<string, []>("op_290_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_290_end_mask_0 = const()[name = tensor<string, []>("op_290_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = var_192_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
+            tensor<int32, [4]> var_297_begin_0 = const()[name = tensor<string, []>("op_297_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_297_end_0 = const()[name = tensor<string, []>("op_297_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_297_end_mask_0 = const()[name = tensor<string, []>("op_297_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_297_cast_fp16 = slice_by_index(begin = var_297_begin_0, end = var_297_end_0, end_mask = var_297_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<int32, [4]> var_304_begin_0 = const()[name = tensor<string, []>("op_304_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_304_end_0 = const()[name = tensor<string, []>("op_304_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_304_end_mask_0 = const()[name = tensor<string, []>("op_304_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_304_cast_fp16 = slice_by_index(begin = var_304_begin_0, end = var_304_end_0, end_mask = var_304_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_304_cast_fp16")];
+            tensor<int32, [4]> var_311_begin_0 = const()[name = tensor<string, []>("op_311_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_311_end_0 = const()[name = tensor<string, []>("op_311_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_311_end_mask_0 = const()[name = tensor<string, []>("op_311_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<int32, [4]> var_318_begin_0 = const()[name = tensor<string, []>("op_318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_318_end_0 = const()[name = tensor<string, []>("op_318_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_318_end_mask_0 = const()[name = tensor<string, []>("op_318_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = var_196_cast_fp16)[name = tensor<string, []>("op_318_cast_fp16")];
+            tensor<int32, [4]> var_325_begin_0 = const()[name = tensor<string, []>("op_325_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_325_end_0 = const()[name = tensor<string, []>("op_325_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_325_end_mask_0 = const()[name = tensor<string, []>("op_325_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_325_cast_fp16")];
+            tensor<int32, [4]> var_332_begin_0 = const()[name = tensor<string, []>("op_332_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_332_end_0 = const()[name = tensor<string, []>("op_332_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_332_end_mask_0 = const()[name = tensor<string, []>("op_332_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_332_cast_fp16 = slice_by_index(begin = var_332_begin_0, end = var_332_end_0, end_mask = var_332_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_332_cast_fp16")];
+            tensor<int32, [4]> var_339_begin_0 = const()[name = tensor<string, []>("op_339_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_339_end_0 = const()[name = tensor<string, []>("op_339_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_339_end_mask_0 = const()[name = tensor<string, []>("op_339_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<int32, [4]> var_346_begin_0 = const()[name = tensor<string, []>("op_346_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_346_end_0 = const()[name = tensor<string, []>("op_346_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_346_end_mask_0 = const()[name = tensor<string, []>("op_346_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = var_200_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<int32, [4]> var_353_begin_0 = const()[name = tensor<string, []>("op_353_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_353_end_0 = const()[name = tensor<string, []>("op_353_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_353_end_mask_0 = const()[name = tensor<string, []>("op_353_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_353_cast_fp16 = slice_by_index(begin = var_353_begin_0, end = var_353_end_0, end_mask = var_353_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_353_cast_fp16")];
+            tensor<int32, [4]> var_360_begin_0 = const()[name = tensor<string, []>("op_360_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_360_end_0 = const()[name = tensor<string, []>("op_360_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_360_end_mask_0 = const()[name = tensor<string, []>("op_360_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_360_cast_fp16")];
+            tensor<int32, [4]> var_367_begin_0 = const()[name = tensor<string, []>("op_367_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_367_end_0 = const()[name = tensor<string, []>("op_367_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_367_end_mask_0 = const()[name = tensor<string, []>("op_367_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = var_367_end_0, end_mask = var_367_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_367_cast_fp16")];
+            tensor<int32, [4]> var_374_begin_0 = const()[name = tensor<string, []>("op_374_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_374_end_0 = const()[name = tensor<string, []>("op_374_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_374_end_mask_0 = const()[name = tensor<string, []>("op_374_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = var_204_cast_fp16)[name = tensor<string, []>("op_374_cast_fp16")];
+            tensor<int32, [4]> k_1_perm_0 = const()[name = tensor<string, []>("k_1_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_379_begin_0 = const()[name = tensor<string, []>("op_379_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_379_end_0 = const()[name = tensor<string, []>("op_379_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_379_end_mask_0 = const()[name = tensor<string, []>("op_379_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> transpose_3 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_379_cast_fp16 = slice_by_index(begin = var_379_begin_0, end = var_379_end_0, end_mask = var_379_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_379_cast_fp16")];
+            tensor<int32, [4]> var_383_begin_0 = const()[name = tensor<string, []>("op_383_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_383_end_0 = const()[name = tensor<string, []>("op_383_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_383_end_mask_0 = const()[name = tensor<string, []>("op_383_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_383_cast_fp16")];
+            tensor<int32, [4]> var_387_begin_0 = const()[name = tensor<string, []>("op_387_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_387_end_0 = const()[name = tensor<string, []>("op_387_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_387_end_mask_0 = const()[name = tensor<string, []>("op_387_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_387_cast_fp16 = slice_by_index(begin = var_387_begin_0, end = var_387_end_0, end_mask = var_387_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_387_cast_fp16")];
+            tensor<int32, [4]> var_391_begin_0 = const()[name = tensor<string, []>("op_391_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_391_end_0 = const()[name = tensor<string, []>("op_391_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_391_end_mask_0 = const()[name = tensor<string, []>("op_391_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_391_cast_fp16 = slice_by_index(begin = var_391_begin_0, end = var_391_end_0, end_mask = var_391_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_391_cast_fp16")];
+            tensor<int32, [4]> var_395_begin_0 = const()[name = tensor<string, []>("op_395_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_395_end_0 = const()[name = tensor<string, []>("op_395_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_395_end_mask_0 = const()[name = tensor<string, []>("op_395_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_395_cast_fp16")];
+            tensor<int32, [4]> var_399_begin_0 = const()[name = tensor<string, []>("op_399_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_399_end_0 = const()[name = tensor<string, []>("op_399_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_399_end_mask_0 = const()[name = tensor<string, []>("op_399_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = transpose_3)[name = tensor<string, []>("op_399_cast_fp16")];
+            tensor<int32, [4]> var_401_begin_0 = const()[name = tensor<string, []>("op_401_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_401_end_0 = const()[name = tensor<string, []>("op_401_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_401_end_mask_0 = const()[name = tensor<string, []>("op_401_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_401_cast_fp16 = slice_by_index(begin = var_401_begin_0, end = var_401_end_0, end_mask = var_401_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_401_cast_fp16")];
+            tensor<int32, [4]> var_405_begin_0 = const()[name = tensor<string, []>("op_405_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_405_end_0 = const()[name = tensor<string, []>("op_405_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_405_end_mask_0 = const()[name = tensor<string, []>("op_405_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_405_cast_fp16 = slice_by_index(begin = var_405_begin_0, end = var_405_end_0, end_mask = var_405_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_405_cast_fp16")];
+            tensor<int32, [4]> var_409_begin_0 = const()[name = tensor<string, []>("op_409_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_409_end_0 = const()[name = tensor<string, []>("op_409_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_409_end_mask_0 = const()[name = tensor<string, []>("op_409_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_409_cast_fp16 = slice_by_index(begin = var_409_begin_0, end = var_409_end_0, end_mask = var_409_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<int32, [4]> var_413_begin_0 = const()[name = tensor<string, []>("op_413_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_413_end_0 = const()[name = tensor<string, []>("op_413_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_413_end_mask_0 = const()[name = tensor<string, []>("op_413_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_413_cast_fp16 = slice_by_index(begin = var_413_begin_0, end = var_413_end_0, end_mask = var_413_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_413_cast_fp16")];
+            tensor<int32, [4]> var_417_begin_0 = const()[name = tensor<string, []>("op_417_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_417_end_0 = const()[name = tensor<string, []>("op_417_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_417_end_mask_0 = const()[name = tensor<string, []>("op_417_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_417_cast_fp16 = slice_by_index(begin = var_417_begin_0, end = var_417_end_0, end_mask = var_417_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_417_cast_fp16")];
+            tensor<int32, [4]> var_421_begin_0 = const()[name = tensor<string, []>("op_421_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_421_end_0 = const()[name = tensor<string, []>("op_421_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_421_end_mask_0 = const()[name = tensor<string, []>("op_421_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_421_cast_fp16 = slice_by_index(begin = var_421_begin_0, end = var_421_end_0, end_mask = var_421_end_mask_0, x = value_1_cast_fp16)[name = tensor<string, []>("op_421_cast_fp16")];
+            tensor<string, []> var_425_equation_0 = const()[name = tensor<string, []>("op_425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_425_cast_fp16 = einsum(equation = var_425_equation_0, values = (var_379_cast_fp16, var_213_cast_fp16))[name = tensor<string, []>("op_425_cast_fp16")];
+            tensor<fp16, []> var_426_to_fp16 = const()[name = tensor<string, []>("op_426_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_1_cast_fp16 = mul(x = var_425_cast_fp16, y = var_426_to_fp16)[name = tensor<string, []>("aw_chunk_1_cast_fp16")];
+            tensor<string, []> var_429_equation_0 = const()[name = tensor<string, []>("op_429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_429_cast_fp16 = einsum(equation = var_429_equation_0, values = (var_379_cast_fp16, var_220_cast_fp16))[name = tensor<string, []>("op_429_cast_fp16")];
+            tensor<fp16, []> var_430_to_fp16 = const()[name = tensor<string, []>("op_430_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_3_cast_fp16 = mul(x = var_429_cast_fp16, y = var_430_to_fp16)[name = tensor<string, []>("aw_chunk_3_cast_fp16")];
+            tensor<string, []> var_433_equation_0 = const()[name = tensor<string, []>("op_433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_433_cast_fp16 = einsum(equation = var_433_equation_0, values = (var_379_cast_fp16, var_227_cast_fp16))[name = tensor<string, []>("op_433_cast_fp16")];
+            tensor<fp16, []> var_434_to_fp16 = const()[name = tensor<string, []>("op_434_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_5_cast_fp16 = mul(x = var_433_cast_fp16, y = var_434_to_fp16)[name = tensor<string, []>("aw_chunk_5_cast_fp16")];
+            tensor<string, []> var_437_equation_0 = const()[name = tensor<string, []>("op_437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_437_cast_fp16 = einsum(equation = var_437_equation_0, values = (var_379_cast_fp16, var_234_cast_fp16))[name = tensor<string, []>("op_437_cast_fp16")];
+            tensor<fp16, []> var_438_to_fp16 = const()[name = tensor<string, []>("op_438_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_7_cast_fp16 = mul(x = var_437_cast_fp16, y = var_438_to_fp16)[name = tensor<string, []>("aw_chunk_7_cast_fp16")];
+            tensor<string, []> var_441_equation_0 = const()[name = tensor<string, []>("op_441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_441_cast_fp16 = einsum(equation = var_441_equation_0, values = (var_383_cast_fp16, var_241_cast_fp16))[name = tensor<string, []>("op_441_cast_fp16")];
+            tensor<fp16, []> var_442_to_fp16 = const()[name = tensor<string, []>("op_442_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_9_cast_fp16 = mul(x = var_441_cast_fp16, y = var_442_to_fp16)[name = tensor<string, []>("aw_chunk_9_cast_fp16")];
+            tensor<string, []> var_445_equation_0 = const()[name = tensor<string, []>("op_445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_445_cast_fp16 = einsum(equation = var_445_equation_0, values = (var_383_cast_fp16, var_248_cast_fp16))[name = tensor<string, []>("op_445_cast_fp16")];
+            tensor<fp16, []> var_446_to_fp16 = const()[name = tensor<string, []>("op_446_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_11_cast_fp16 = mul(x = var_445_cast_fp16, y = var_446_to_fp16)[name = tensor<string, []>("aw_chunk_11_cast_fp16")];
+            tensor<string, []> var_449_equation_0 = const()[name = tensor<string, []>("op_449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_449_cast_fp16 = einsum(equation = var_449_equation_0, values = (var_383_cast_fp16, var_255_cast_fp16))[name = tensor<string, []>("op_449_cast_fp16")];
+            tensor<fp16, []> var_450_to_fp16 = const()[name = tensor<string, []>("op_450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_13_cast_fp16 = mul(x = var_449_cast_fp16, y = var_450_to_fp16)[name = tensor<string, []>("aw_chunk_13_cast_fp16")];
+            tensor<string, []> var_453_equation_0 = const()[name = tensor<string, []>("op_453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_453_cast_fp16 = einsum(equation = var_453_equation_0, values = (var_383_cast_fp16, var_262_cast_fp16))[name = tensor<string, []>("op_453_cast_fp16")];
+            tensor<fp16, []> var_454_to_fp16 = const()[name = tensor<string, []>("op_454_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_15_cast_fp16 = mul(x = var_453_cast_fp16, y = var_454_to_fp16)[name = tensor<string, []>("aw_chunk_15_cast_fp16")];
+            tensor<string, []> var_457_equation_0 = const()[name = tensor<string, []>("op_457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_457_cast_fp16 = einsum(equation = var_457_equation_0, values = (var_387_cast_fp16, var_269_cast_fp16))[name = tensor<string, []>("op_457_cast_fp16")];
+            tensor<fp16, []> var_458_to_fp16 = const()[name = tensor<string, []>("op_458_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_17_cast_fp16 = mul(x = var_457_cast_fp16, y = var_458_to_fp16)[name = tensor<string, []>("aw_chunk_17_cast_fp16")];
+            tensor<string, []> var_461_equation_0 = const()[name = tensor<string, []>("op_461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_461_cast_fp16 = einsum(equation = var_461_equation_0, values = (var_387_cast_fp16, var_276_cast_fp16))[name = tensor<string, []>("op_461_cast_fp16")];
+            tensor<fp16, []> var_462_to_fp16 = const()[name = tensor<string, []>("op_462_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_19_cast_fp16 = mul(x = var_461_cast_fp16, y = var_462_to_fp16)[name = tensor<string, []>("aw_chunk_19_cast_fp16")];
+            tensor<string, []> var_465_equation_0 = const()[name = tensor<string, []>("op_465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_465_cast_fp16 = einsum(equation = var_465_equation_0, values = (var_387_cast_fp16, var_283_cast_fp16))[name = tensor<string, []>("op_465_cast_fp16")];
+            tensor<fp16, []> var_466_to_fp16 = const()[name = tensor<string, []>("op_466_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_21_cast_fp16 = mul(x = var_465_cast_fp16, y = var_466_to_fp16)[name = tensor<string, []>("aw_chunk_21_cast_fp16")];
+            tensor<string, []> var_469_equation_0 = const()[name = tensor<string, []>("op_469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_469_cast_fp16 = einsum(equation = var_469_equation_0, values = (var_387_cast_fp16, var_290_cast_fp16))[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<fp16, []> var_470_to_fp16 = const()[name = tensor<string, []>("op_470_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_23_cast_fp16 = mul(x = var_469_cast_fp16, y = var_470_to_fp16)[name = tensor<string, []>("aw_chunk_23_cast_fp16")];
+            tensor<string, []> var_473_equation_0 = const()[name = tensor<string, []>("op_473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_473_cast_fp16 = einsum(equation = var_473_equation_0, values = (var_391_cast_fp16, var_297_cast_fp16))[name = tensor<string, []>("op_473_cast_fp16")];
+            tensor<fp16, []> var_474_to_fp16 = const()[name = tensor<string, []>("op_474_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_25_cast_fp16 = mul(x = var_473_cast_fp16, y = var_474_to_fp16)[name = tensor<string, []>("aw_chunk_25_cast_fp16")];
+            tensor<string, []> var_477_equation_0 = const()[name = tensor<string, []>("op_477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_477_cast_fp16 = einsum(equation = var_477_equation_0, values = (var_391_cast_fp16, var_304_cast_fp16))[name = tensor<string, []>("op_477_cast_fp16")];
+            tensor<fp16, []> var_478_to_fp16 = const()[name = tensor<string, []>("op_478_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_27_cast_fp16 = mul(x = var_477_cast_fp16, y = var_478_to_fp16)[name = tensor<string, []>("aw_chunk_27_cast_fp16")];
+            tensor<string, []> var_481_equation_0 = const()[name = tensor<string, []>("op_481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_481_cast_fp16 = einsum(equation = var_481_equation_0, values = (var_391_cast_fp16, var_311_cast_fp16))[name = tensor<string, []>("op_481_cast_fp16")];
+            tensor<fp16, []> var_482_to_fp16 = const()[name = tensor<string, []>("op_482_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_29_cast_fp16 = mul(x = var_481_cast_fp16, y = var_482_to_fp16)[name = tensor<string, []>("aw_chunk_29_cast_fp16")];
+            tensor<string, []> var_485_equation_0 = const()[name = tensor<string, []>("op_485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_485_cast_fp16 = einsum(equation = var_485_equation_0, values = (var_391_cast_fp16, var_318_cast_fp16))[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<fp16, []> var_486_to_fp16 = const()[name = tensor<string, []>("op_486_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_31_cast_fp16 = mul(x = var_485_cast_fp16, y = var_486_to_fp16)[name = tensor<string, []>("aw_chunk_31_cast_fp16")];
+            tensor<string, []> var_489_equation_0 = const()[name = tensor<string, []>("op_489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_489_cast_fp16 = einsum(equation = var_489_equation_0, values = (var_395_cast_fp16, var_325_cast_fp16))[name = tensor<string, []>("op_489_cast_fp16")];
+            tensor<fp16, []> var_490_to_fp16 = const()[name = tensor<string, []>("op_490_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_33_cast_fp16 = mul(x = var_489_cast_fp16, y = var_490_to_fp16)[name = tensor<string, []>("aw_chunk_33_cast_fp16")];
+            tensor<string, []> var_493_equation_0 = const()[name = tensor<string, []>("op_493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_493_cast_fp16 = einsum(equation = var_493_equation_0, values = (var_395_cast_fp16, var_332_cast_fp16))[name = tensor<string, []>("op_493_cast_fp16")];
+            tensor<fp16, []> var_494_to_fp16 = const()[name = tensor<string, []>("op_494_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_35_cast_fp16 = mul(x = var_493_cast_fp16, y = var_494_to_fp16)[name = tensor<string, []>("aw_chunk_35_cast_fp16")];
+            tensor<string, []> var_497_equation_0 = const()[name = tensor<string, []>("op_497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_497_cast_fp16 = einsum(equation = var_497_equation_0, values = (var_395_cast_fp16, var_339_cast_fp16))[name = tensor<string, []>("op_497_cast_fp16")];
+            tensor<fp16, []> var_498_to_fp16 = const()[name = tensor<string, []>("op_498_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_37_cast_fp16 = mul(x = var_497_cast_fp16, y = var_498_to_fp16)[name = tensor<string, []>("aw_chunk_37_cast_fp16")];
+            tensor<string, []> var_501_equation_0 = const()[name = tensor<string, []>("op_501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_501_cast_fp16 = einsum(equation = var_501_equation_0, values = (var_395_cast_fp16, var_346_cast_fp16))[name = tensor<string, []>("op_501_cast_fp16")];
+            tensor<fp16, []> var_502_to_fp16 = const()[name = tensor<string, []>("op_502_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_39_cast_fp16 = mul(x = var_501_cast_fp16, y = var_502_to_fp16)[name = tensor<string, []>("aw_chunk_39_cast_fp16")];
+            tensor<string, []> var_505_equation_0 = const()[name = tensor<string, []>("op_505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_505_cast_fp16 = einsum(equation = var_505_equation_0, values = (var_399_cast_fp16, var_353_cast_fp16))[name = tensor<string, []>("op_505_cast_fp16")];
+            tensor<fp16, []> var_506_to_fp16 = const()[name = tensor<string, []>("op_506_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_41_cast_fp16 = mul(x = var_505_cast_fp16, y = var_506_to_fp16)[name = tensor<string, []>("aw_chunk_41_cast_fp16")];
+            tensor<string, []> var_509_equation_0 = const()[name = tensor<string, []>("op_509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_509_cast_fp16 = einsum(equation = var_509_equation_0, values = (var_399_cast_fp16, var_360_cast_fp16))[name = tensor<string, []>("op_509_cast_fp16")];
+            tensor<fp16, []> var_510_to_fp16 = const()[name = tensor<string, []>("op_510_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_43_cast_fp16 = mul(x = var_509_cast_fp16, y = var_510_to_fp16)[name = tensor<string, []>("aw_chunk_43_cast_fp16")];
+            tensor<string, []> var_513_equation_0 = const()[name = tensor<string, []>("op_513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_513_cast_fp16 = einsum(equation = var_513_equation_0, values = (var_399_cast_fp16, var_367_cast_fp16))[name = tensor<string, []>("op_513_cast_fp16")];
+            tensor<fp16, []> var_514_to_fp16 = const()[name = tensor<string, []>("op_514_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_45_cast_fp16 = mul(x = var_513_cast_fp16, y = var_514_to_fp16)[name = tensor<string, []>("aw_chunk_45_cast_fp16")];
+            tensor<string, []> var_517_equation_0 = const()[name = tensor<string, []>("op_517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_517_cast_fp16 = einsum(equation = var_517_equation_0, values = (var_399_cast_fp16, var_374_cast_fp16))[name = tensor<string, []>("op_517_cast_fp16")];
+            tensor<fp16, []> var_518_to_fp16 = const()[name = tensor<string, []>("op_518_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_47_cast_fp16 = mul(x = var_517_cast_fp16, y = var_518_to_fp16)[name = tensor<string, []>("aw_chunk_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_520_cast_fp16 = softmax(axis = var_129, x = aw_chunk_1_cast_fp16)[name = tensor<string, []>("op_520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_521_cast_fp16 = softmax(axis = var_129, x = aw_chunk_3_cast_fp16)[name = tensor<string, []>("op_521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_522_cast_fp16 = softmax(axis = var_129, x = aw_chunk_5_cast_fp16)[name = tensor<string, []>("op_522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_523_cast_fp16 = softmax(axis = var_129, x = aw_chunk_7_cast_fp16)[name = tensor<string, []>("op_523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_524_cast_fp16 = softmax(axis = var_129, x = aw_chunk_9_cast_fp16)[name = tensor<string, []>("op_524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_525_cast_fp16 = softmax(axis = var_129, x = aw_chunk_11_cast_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_526_cast_fp16 = softmax(axis = var_129, x = aw_chunk_13_cast_fp16)[name = tensor<string, []>("op_526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_527_cast_fp16 = softmax(axis = var_129, x = aw_chunk_15_cast_fp16)[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_528_cast_fp16 = softmax(axis = var_129, x = aw_chunk_17_cast_fp16)[name = tensor<string, []>("op_528_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_529_cast_fp16 = softmax(axis = var_129, x = aw_chunk_19_cast_fp16)[name = tensor<string, []>("op_529_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_530_cast_fp16 = softmax(axis = var_129, x = aw_chunk_21_cast_fp16)[name = tensor<string, []>("op_530_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_531_cast_fp16 = softmax(axis = var_129, x = aw_chunk_23_cast_fp16)[name = tensor<string, []>("op_531_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_532_cast_fp16 = softmax(axis = var_129, x = aw_chunk_25_cast_fp16)[name = tensor<string, []>("op_532_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_533_cast_fp16 = softmax(axis = var_129, x = aw_chunk_27_cast_fp16)[name = tensor<string, []>("op_533_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_534_cast_fp16 = softmax(axis = var_129, x = aw_chunk_29_cast_fp16)[name = tensor<string, []>("op_534_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_535_cast_fp16 = softmax(axis = var_129, x = aw_chunk_31_cast_fp16)[name = tensor<string, []>("op_535_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_536_cast_fp16 = softmax(axis = var_129, x = aw_chunk_33_cast_fp16)[name = tensor<string, []>("op_536_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_537_cast_fp16 = softmax(axis = var_129, x = aw_chunk_35_cast_fp16)[name = tensor<string, []>("op_537_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_538_cast_fp16 = softmax(axis = var_129, x = aw_chunk_37_cast_fp16)[name = tensor<string, []>("op_538_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_539_cast_fp16 = softmax(axis = var_129, x = aw_chunk_39_cast_fp16)[name = tensor<string, []>("op_539_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_540_cast_fp16 = softmax(axis = var_129, x = aw_chunk_41_cast_fp16)[name = tensor<string, []>("op_540_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_541_cast_fp16 = softmax(axis = var_129, x = aw_chunk_43_cast_fp16)[name = tensor<string, []>("op_541_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_542_cast_fp16 = softmax(axis = var_129, x = aw_chunk_45_cast_fp16)[name = tensor<string, []>("op_542_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_543_cast_fp16 = softmax(axis = var_129, x = aw_chunk_47_cast_fp16)[name = tensor<string, []>("op_543_cast_fp16")];
+            tensor<string, []> var_545_equation_0 = const()[name = tensor<string, []>("op_545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_545_cast_fp16 = einsum(equation = var_545_equation_0, values = (var_401_cast_fp16, var_520_cast_fp16))[name = tensor<string, []>("op_545_cast_fp16")];
+            tensor<string, []> var_547_equation_0 = const()[name = tensor<string, []>("op_547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_401_cast_fp16, var_521_cast_fp16))[name = tensor<string, []>("op_547_cast_fp16")];
+            tensor<string, []> var_549_equation_0 = const()[name = tensor<string, []>("op_549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_549_cast_fp16 = einsum(equation = var_549_equation_0, values = (var_401_cast_fp16, var_522_cast_fp16))[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<string, []> var_551_equation_0 = const()[name = tensor<string, []>("op_551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_401_cast_fp16, var_523_cast_fp16))[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<string, []> var_553_equation_0 = const()[name = tensor<string, []>("op_553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_553_cast_fp16 = einsum(equation = var_553_equation_0, values = (var_405_cast_fp16, var_524_cast_fp16))[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<string, []> var_555_equation_0 = const()[name = tensor<string, []>("op_555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_405_cast_fp16, var_525_cast_fp16))[name = tensor<string, []>("op_555_cast_fp16")];
+            tensor<string, []> var_557_equation_0 = const()[name = tensor<string, []>("op_557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_557_cast_fp16 = einsum(equation = var_557_equation_0, values = (var_405_cast_fp16, var_526_cast_fp16))[name = tensor<string, []>("op_557_cast_fp16")];
+            tensor<string, []> var_559_equation_0 = const()[name = tensor<string, []>("op_559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_405_cast_fp16, var_527_cast_fp16))[name = tensor<string, []>("op_559_cast_fp16")];
+            tensor<string, []> var_561_equation_0 = const()[name = tensor<string, []>("op_561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_561_cast_fp16 = einsum(equation = var_561_equation_0, values = (var_409_cast_fp16, var_528_cast_fp16))[name = tensor<string, []>("op_561_cast_fp16")];
+            tensor<string, []> var_563_equation_0 = const()[name = tensor<string, []>("op_563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_409_cast_fp16, var_529_cast_fp16))[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<string, []> var_565_equation_0 = const()[name = tensor<string, []>("op_565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_565_cast_fp16 = einsum(equation = var_565_equation_0, values = (var_409_cast_fp16, var_530_cast_fp16))[name = tensor<string, []>("op_565_cast_fp16")];
+            tensor<string, []> var_567_equation_0 = const()[name = tensor<string, []>("op_567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_409_cast_fp16, var_531_cast_fp16))[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<string, []> var_569_equation_0 = const()[name = tensor<string, []>("op_569_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_569_cast_fp16 = einsum(equation = var_569_equation_0, values = (var_413_cast_fp16, var_532_cast_fp16))[name = tensor<string, []>("op_569_cast_fp16")];
+            tensor<string, []> var_571_equation_0 = const()[name = tensor<string, []>("op_571_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_413_cast_fp16, var_533_cast_fp16))[name = tensor<string, []>("op_571_cast_fp16")];
+            tensor<string, []> var_573_equation_0 = const()[name = tensor<string, []>("op_573_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_573_cast_fp16 = einsum(equation = var_573_equation_0, values = (var_413_cast_fp16, var_534_cast_fp16))[name = tensor<string, []>("op_573_cast_fp16")];
+            tensor<string, []> var_575_equation_0 = const()[name = tensor<string, []>("op_575_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_575_cast_fp16 = einsum(equation = var_575_equation_0, values = (var_413_cast_fp16, var_535_cast_fp16))[name = tensor<string, []>("op_575_cast_fp16")];
+            tensor<string, []> var_577_equation_0 = const()[name = tensor<string, []>("op_577_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_577_cast_fp16 = einsum(equation = var_577_equation_0, values = (var_417_cast_fp16, var_536_cast_fp16))[name = tensor<string, []>("op_577_cast_fp16")];
+            tensor<string, []> var_579_equation_0 = const()[name = tensor<string, []>("op_579_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_579_cast_fp16 = einsum(equation = var_579_equation_0, values = (var_417_cast_fp16, var_537_cast_fp16))[name = tensor<string, []>("op_579_cast_fp16")];
+            tensor<string, []> var_581_equation_0 = const()[name = tensor<string, []>("op_581_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_581_cast_fp16 = einsum(equation = var_581_equation_0, values = (var_417_cast_fp16, var_538_cast_fp16))[name = tensor<string, []>("op_581_cast_fp16")];
+            tensor<string, []> var_583_equation_0 = const()[name = tensor<string, []>("op_583_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_583_cast_fp16 = einsum(equation = var_583_equation_0, values = (var_417_cast_fp16, var_539_cast_fp16))[name = tensor<string, []>("op_583_cast_fp16")];
+            tensor<string, []> var_585_equation_0 = const()[name = tensor<string, []>("op_585_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_585_cast_fp16 = einsum(equation = var_585_equation_0, values = (var_421_cast_fp16, var_540_cast_fp16))[name = tensor<string, []>("op_585_cast_fp16")];
+            tensor<string, []> var_587_equation_0 = const()[name = tensor<string, []>("op_587_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_587_cast_fp16 = einsum(equation = var_587_equation_0, values = (var_421_cast_fp16, var_541_cast_fp16))[name = tensor<string, []>("op_587_cast_fp16")];
+            tensor<string, []> var_589_equation_0 = const()[name = tensor<string, []>("op_589_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_589_cast_fp16 = einsum(equation = var_589_equation_0, values = (var_421_cast_fp16, var_542_cast_fp16))[name = tensor<string, []>("op_589_cast_fp16")];
+            tensor<string, []> var_591_equation_0 = const()[name = tensor<string, []>("op_591_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_591_cast_fp16 = einsum(equation = var_591_equation_0, values = (var_421_cast_fp16, var_543_cast_fp16))[name = tensor<string, []>("op_591_cast_fp16")];
+            tensor<bool, []> var_593_interleave_0 = const()[name = tensor<string, []>("op_593_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_593_cast_fp16 = concat(axis = var_118, interleave = var_593_interleave_0, values = (var_545_cast_fp16, var_547_cast_fp16, var_549_cast_fp16, var_551_cast_fp16))[name = tensor<string, []>("op_593_cast_fp16")];
+            tensor<bool, []> var_595_interleave_0 = const()[name = tensor<string, []>("op_595_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_595_cast_fp16 = concat(axis = var_118, interleave = var_595_interleave_0, values = (var_553_cast_fp16, var_555_cast_fp16, var_557_cast_fp16, var_559_cast_fp16))[name = tensor<string, []>("op_595_cast_fp16")];
+            tensor<bool, []> var_597_interleave_0 = const()[name = tensor<string, []>("op_597_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_597_cast_fp16 = concat(axis = var_118, interleave = var_597_interleave_0, values = (var_561_cast_fp16, var_563_cast_fp16, var_565_cast_fp16, var_567_cast_fp16))[name = tensor<string, []>("op_597_cast_fp16")];
+            tensor<bool, []> var_599_interleave_0 = const()[name = tensor<string, []>("op_599_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_599_cast_fp16 = concat(axis = var_118, interleave = var_599_interleave_0, values = (var_569_cast_fp16, var_571_cast_fp16, var_573_cast_fp16, var_575_cast_fp16))[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<bool, []> var_601_interleave_0 = const()[name = tensor<string, []>("op_601_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_601_cast_fp16 = concat(axis = var_118, interleave = var_601_interleave_0, values = (var_577_cast_fp16, var_579_cast_fp16, var_581_cast_fp16, var_583_cast_fp16))[name = tensor<string, []>("op_601_cast_fp16")];
+            tensor<bool, []> var_603_interleave_0 = const()[name = tensor<string, []>("op_603_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_603_cast_fp16 = concat(axis = var_118, interleave = var_603_interleave_0, values = (var_585_cast_fp16, var_587_cast_fp16, var_589_cast_fp16, var_591_cast_fp16))[name = tensor<string, []>("op_603_cast_fp16")];
+            tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_1_cast_fp16 = concat(axis = var_129, interleave = input_1_interleave_0, values = (var_593_cast_fp16, var_595_cast_fp16, var_597_cast_fp16, var_599_cast_fp16, var_601_cast_fp16, var_603_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_608 = const()[name = tensor<string, []>("op_608"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_610 = const()[name = tensor<string, []>("op_610"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_3_pad_type_0 = const()[name = tensor<string, []>("obj_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_3_pad_0 = const()[name = tensor<string, []>("obj_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3112896)))];
+            tensor<fp16, [384]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3407872)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_610, groups = var_129, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = var_608, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_616 = const()[name = tensor<string, []>("op_616"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_616, keep_dims = var_130, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_620 = const()[name = tensor<string, []>("op_620"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_621_cast_fp16 = reduce_mean(axes = var_620, keep_dims = var_130, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_621_cast_fp16")];
+            tensor<fp16, []> var_622_to_fp16 = const()[name = tensor<string, []>("op_622_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_623_cast_fp16 = add(x = var_621_cast_fp16, y = var_622_to_fp16)[name = tensor<string, []>("op_623_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_623_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [384]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3408704)))];
+            tensor<fp16, [384]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3409536)))];
+            tensor<fp16, []> input_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_634 = const()[name = tensor<string, []>("op_634"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_636 = const()[name = tensor<string, []>("op_636"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_5_pad_type_0 = const()[name = tensor<string, []>("input_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3410368)))];
+            tensor<fp16, [1536]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4590080)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_636, groups = var_129, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = var_634, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<int32, [2]> var_642 = const()[name = tensor<string, []>("op_642"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_644 = const()[name = tensor<string, []>("op_644"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4593216)))];
+            tensor<fp16, [384]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5772928)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_644, groups = var_129, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_642, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_651 = const()[name = tensor<string, []>("op_651"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_662 = const()[name = tensor<string, []>("op_662"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_663 = const()[name = tensor<string, []>("op_663"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_673 = const()[name = tensor<string, []>("op_673"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_673, keep_dims = var_663, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_677 = const()[name = tensor<string, []>("op_677"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_678_cast_fp16 = reduce_mean(axes = var_677, keep_dims = var_663, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_678_cast_fp16")];
+            tensor<fp16, []> var_679_to_fp16 = const()[name = tensor<string, []>("op_679_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_680_cast_fp16 = add(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor<string, []>("op_680_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_680_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [384]> obj_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5773760)))];
+            tensor<fp16, [384]> obj_5_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_5_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5774592)))];
+            tensor<fp16, []> obj_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("obj_5_cast_fp16")];
+            tensor<int32, [2]> var_695 = const()[name = tensor<string, []>("op_695"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_697 = const()[name = tensor<string, []>("op_697"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5775424)))];
+            tensor<fp16, [384]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6070400)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_697, groups = var_662, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_695, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_701 = const()[name = tensor<string, []>("op_701"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_703 = const()[name = tensor<string, []>("op_703"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6071232)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_703, groups = var_662, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_701, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_708 = const()[name = tensor<string, []>("op_708"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_710 = const()[name = tensor<string, []>("op_710"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6366208)))];
+            tensor<fp16, [384]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6661184)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_710, groups = var_662, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_708, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_717_begin_0 = const()[name = tensor<string, []>("op_717_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_717_end_0 = const()[name = tensor<string, []>("op_717_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_717_end_mask_0 = const()[name = tensor<string, []>("op_717_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_717_cast_fp16 = slice_by_index(begin = var_717_begin_0, end = var_717_end_0, end_mask = var_717_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_717_cast_fp16")];
+            tensor<int32, [4]> var_721_begin_0 = const()[name = tensor<string, []>("op_721_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_721_end_0 = const()[name = tensor<string, []>("op_721_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_721_end_mask_0 = const()[name = tensor<string, []>("op_721_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_721_cast_fp16 = slice_by_index(begin = var_721_begin_0, end = var_721_end_0, end_mask = var_721_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_721_cast_fp16")];
+            tensor<int32, [4]> var_725_begin_0 = const()[name = tensor<string, []>("op_725_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_725_end_0 = const()[name = tensor<string, []>("op_725_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_725_end_mask_0 = const()[name = tensor<string, []>("op_725_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_725_cast_fp16 = slice_by_index(begin = var_725_begin_0, end = var_725_end_0, end_mask = var_725_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<int32, [4]> var_729_begin_0 = const()[name = tensor<string, []>("op_729_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_729_end_0 = const()[name = tensor<string, []>("op_729_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_729_end_mask_0 = const()[name = tensor<string, []>("op_729_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_729_cast_fp16 = slice_by_index(begin = var_729_begin_0, end = var_729_end_0, end_mask = var_729_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_729_cast_fp16")];
+            tensor<int32, [4]> var_733_begin_0 = const()[name = tensor<string, []>("op_733_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_733_end_0 = const()[name = tensor<string, []>("op_733_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_733_end_mask_0 = const()[name = tensor<string, []>("op_733_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_733_cast_fp16 = slice_by_index(begin = var_733_begin_0, end = var_733_end_0, end_mask = var_733_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_733_cast_fp16")];
+            tensor<int32, [4]> var_737_begin_0 = const()[name = tensor<string, []>("op_737_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_737_end_0 = const()[name = tensor<string, []>("op_737_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_737_end_mask_0 = const()[name = tensor<string, []>("op_737_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_737_cast_fp16 = slice_by_index(begin = var_737_begin_0, end = var_737_end_0, end_mask = var_737_end_mask_0, x = query_3_cast_fp16)[name = tensor<string, []>("op_737_cast_fp16")];
+            tensor<int32, [4]> var_746_begin_0 = const()[name = tensor<string, []>("op_746_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_746_end_0 = const()[name = tensor<string, []>("op_746_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_746_end_mask_0 = const()[name = tensor<string, []>("op_746_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_746_cast_fp16")];
+            tensor<int32, [4]> var_753_begin_0 = const()[name = tensor<string, []>("op_753_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_753_end_0 = const()[name = tensor<string, []>("op_753_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_753_end_mask_0 = const()[name = tensor<string, []>("op_753_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_753_cast_fp16 = slice_by_index(begin = var_753_begin_0, end = var_753_end_0, end_mask = var_753_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_753_cast_fp16")];
+            tensor<int32, [4]> var_760_begin_0 = const()[name = tensor<string, []>("op_760_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_760_end_0 = const()[name = tensor<string, []>("op_760_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_760_end_mask_0 = const()[name = tensor<string, []>("op_760_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_760_cast_fp16 = slice_by_index(begin = var_760_begin_0, end = var_760_end_0, end_mask = var_760_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_760_cast_fp16")];
+            tensor<int32, [4]> var_767_begin_0 = const()[name = tensor<string, []>("op_767_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_767_end_0 = const()[name = tensor<string, []>("op_767_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_767_end_mask_0 = const()[name = tensor<string, []>("op_767_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_767_cast_fp16 = slice_by_index(begin = var_767_begin_0, end = var_767_end_0, end_mask = var_767_end_mask_0, x = var_717_cast_fp16)[name = tensor<string, []>("op_767_cast_fp16")];
+            tensor<int32, [4]> var_774_begin_0 = const()[name = tensor<string, []>("op_774_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_774_end_0 = const()[name = tensor<string, []>("op_774_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_774_end_mask_0 = const()[name = tensor<string, []>("op_774_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_774_cast_fp16 = slice_by_index(begin = var_774_begin_0, end = var_774_end_0, end_mask = var_774_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_774_cast_fp16")];
+            tensor<int32, [4]> var_781_begin_0 = const()[name = tensor<string, []>("op_781_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_781_end_0 = const()[name = tensor<string, []>("op_781_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_781_end_mask_0 = const()[name = tensor<string, []>("op_781_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_781_cast_fp16 = slice_by_index(begin = var_781_begin_0, end = var_781_end_0, end_mask = var_781_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_781_cast_fp16")];
+            tensor<int32, [4]> var_788_begin_0 = const()[name = tensor<string, []>("op_788_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_788_end_0 = const()[name = tensor<string, []>("op_788_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_788_end_mask_0 = const()[name = tensor<string, []>("op_788_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_788_cast_fp16 = slice_by_index(begin = var_788_begin_0, end = var_788_end_0, end_mask = var_788_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_788_cast_fp16")];
+            tensor<int32, [4]> var_795_begin_0 = const()[name = tensor<string, []>("op_795_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_795_end_0 = const()[name = tensor<string, []>("op_795_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_795_end_mask_0 = const()[name = tensor<string, []>("op_795_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_795_cast_fp16 = slice_by_index(begin = var_795_begin_0, end = var_795_end_0, end_mask = var_795_end_mask_0, x = var_721_cast_fp16)[name = tensor<string, []>("op_795_cast_fp16")];
+            tensor<int32, [4]> var_802_begin_0 = const()[name = tensor<string, []>("op_802_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_802_end_0 = const()[name = tensor<string, []>("op_802_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_802_end_mask_0 = const()[name = tensor<string, []>("op_802_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_802_cast_fp16 = slice_by_index(begin = var_802_begin_0, end = var_802_end_0, end_mask = var_802_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_802_cast_fp16")];
+            tensor<int32, [4]> var_809_begin_0 = const()[name = tensor<string, []>("op_809_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_809_end_0 = const()[name = tensor<string, []>("op_809_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_809_end_mask_0 = const()[name = tensor<string, []>("op_809_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_809_cast_fp16 = slice_by_index(begin = var_809_begin_0, end = var_809_end_0, end_mask = var_809_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_809_cast_fp16")];
+            tensor<int32, [4]> var_816_begin_0 = const()[name = tensor<string, []>("op_816_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_816_end_0 = const()[name = tensor<string, []>("op_816_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_816_end_mask_0 = const()[name = tensor<string, []>("op_816_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_816_cast_fp16 = slice_by_index(begin = var_816_begin_0, end = var_816_end_0, end_mask = var_816_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<int32, [4]> var_823_begin_0 = const()[name = tensor<string, []>("op_823_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_823_end_0 = const()[name = tensor<string, []>("op_823_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_823_end_mask_0 = const()[name = tensor<string, []>("op_823_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_823_cast_fp16 = slice_by_index(begin = var_823_begin_0, end = var_823_end_0, end_mask = var_823_end_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<int32, [4]> var_830_begin_0 = const()[name = tensor<string, []>("op_830_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_830_end_0 = const()[name = tensor<string, []>("op_830_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_830_end_mask_0 = const()[name = tensor<string, []>("op_830_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_830_cast_fp16 = slice_by_index(begin = var_830_begin_0, end = var_830_end_0, end_mask = var_830_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<int32, [4]> var_837_begin_0 = const()[name = tensor<string, []>("op_837_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_837_end_0 = const()[name = tensor<string, []>("op_837_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_837_end_mask_0 = const()[name = tensor<string, []>("op_837_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_837_cast_fp16")];
+            tensor<int32, [4]> var_844_begin_0 = const()[name = tensor<string, []>("op_844_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_844_end_0 = const()[name = tensor<string, []>("op_844_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_844_end_mask_0 = const()[name = tensor<string, []>("op_844_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_844_cast_fp16 = slice_by_index(begin = var_844_begin_0, end = var_844_end_0, end_mask = var_844_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_844_cast_fp16")];
+            tensor<int32, [4]> var_851_begin_0 = const()[name = tensor<string, []>("op_851_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_851_end_0 = const()[name = tensor<string, []>("op_851_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_851_end_mask_0 = const()[name = tensor<string, []>("op_851_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_851_cast_fp16 = slice_by_index(begin = var_851_begin_0, end = var_851_end_0, end_mask = var_851_end_mask_0, x = var_729_cast_fp16)[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<int32, [4]> var_858_begin_0 = const()[name = tensor<string, []>("op_858_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_858_end_0 = const()[name = tensor<string, []>("op_858_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_858_end_mask_0 = const()[name = tensor<string, []>("op_858_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_858_cast_fp16 = slice_by_index(begin = var_858_begin_0, end = var_858_end_0, end_mask = var_858_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_858_cast_fp16")];
+            tensor<int32, [4]> var_865_begin_0 = const()[name = tensor<string, []>("op_865_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_865_end_0 = const()[name = tensor<string, []>("op_865_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_865_end_mask_0 = const()[name = tensor<string, []>("op_865_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<int32, [4]> var_872_begin_0 = const()[name = tensor<string, []>("op_872_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_872_end_0 = const()[name = tensor<string, []>("op_872_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_872_end_mask_0 = const()[name = tensor<string, []>("op_872_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = var_872_end_0, end_mask = var_872_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_872_cast_fp16")];
+            tensor<int32, [4]> var_879_begin_0 = const()[name = tensor<string, []>("op_879_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_879_end_0 = const()[name = tensor<string, []>("op_879_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_879_end_mask_0 = const()[name = tensor<string, []>("op_879_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_879_cast_fp16 = slice_by_index(begin = var_879_begin_0, end = var_879_end_0, end_mask = var_879_end_mask_0, x = var_733_cast_fp16)[name = tensor<string, []>("op_879_cast_fp16")];
+            tensor<int32, [4]> var_886_begin_0 = const()[name = tensor<string, []>("op_886_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_886_end_0 = const()[name = tensor<string, []>("op_886_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_886_end_mask_0 = const()[name = tensor<string, []>("op_886_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_886_cast_fp16 = slice_by_index(begin = var_886_begin_0, end = var_886_end_0, end_mask = var_886_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<int32, [4]> var_893_begin_0 = const()[name = tensor<string, []>("op_893_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_893_end_0 = const()[name = tensor<string, []>("op_893_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_893_end_mask_0 = const()[name = tensor<string, []>("op_893_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_893_cast_fp16 = slice_by_index(begin = var_893_begin_0, end = var_893_end_0, end_mask = var_893_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_893_cast_fp16")];
+            tensor<int32, [4]> var_900_begin_0 = const()[name = tensor<string, []>("op_900_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_900_end_0 = const()[name = tensor<string, []>("op_900_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_900_end_mask_0 = const()[name = tensor<string, []>("op_900_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = var_900_end_0, end_mask = var_900_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_900_cast_fp16")];
+            tensor<int32, [4]> var_907_begin_0 = const()[name = tensor<string, []>("op_907_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_907_end_0 = const()[name = tensor<string, []>("op_907_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_907_end_mask_0 = const()[name = tensor<string, []>("op_907_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_907_cast_fp16 = slice_by_index(begin = var_907_begin_0, end = var_907_end_0, end_mask = var_907_end_mask_0, x = var_737_cast_fp16)[name = tensor<string, []>("op_907_cast_fp16")];
+            tensor<int32, [4]> k_3_perm_0 = const()[name = tensor<string, []>("k_3_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_912_begin_0 = const()[name = tensor<string, []>("op_912_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_912_end_0 = const()[name = tensor<string, []>("op_912_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_912_end_mask_0 = const()[name = tensor<string, []>("op_912_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> transpose_2 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_912_cast_fp16 = slice_by_index(begin = var_912_begin_0, end = var_912_end_0, end_mask = var_912_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_912_cast_fp16")];
+            tensor<int32, [4]> var_916_begin_0 = const()[name = tensor<string, []>("op_916_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_916_end_0 = const()[name = tensor<string, []>("op_916_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_916_end_mask_0 = const()[name = tensor<string, []>("op_916_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_916_cast_fp16 = slice_by_index(begin = var_916_begin_0, end = var_916_end_0, end_mask = var_916_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_916_cast_fp16")];
+            tensor<int32, [4]> var_920_begin_0 = const()[name = tensor<string, []>("op_920_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_920_end_0 = const()[name = tensor<string, []>("op_920_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_920_end_mask_0 = const()[name = tensor<string, []>("op_920_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_920_cast_fp16")];
+            tensor<int32, [4]> var_924_begin_0 = const()[name = tensor<string, []>("op_924_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_924_end_0 = const()[name = tensor<string, []>("op_924_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_924_end_mask_0 = const()[name = tensor<string, []>("op_924_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_924_cast_fp16")];
+            tensor<int32, [4]> var_928_begin_0 = const()[name = tensor<string, []>("op_928_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_928_end_0 = const()[name = tensor<string, []>("op_928_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_928_end_mask_0 = const()[name = tensor<string, []>("op_928_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_928_cast_fp16")];
+            tensor<int32, [4]> var_932_begin_0 = const()[name = tensor<string, []>("op_932_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_932_end_0 = const()[name = tensor<string, []>("op_932_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_932_end_mask_0 = const()[name = tensor<string, []>("op_932_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = transpose_2)[name = tensor<string, []>("op_932_cast_fp16")];
+            tensor<int32, [4]> var_934_begin_0 = const()[name = tensor<string, []>("op_934_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_934_end_0 = const()[name = tensor<string, []>("op_934_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_934_end_mask_0 = const()[name = tensor<string, []>("op_934_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_934_cast_fp16 = slice_by_index(begin = var_934_begin_0, end = var_934_end_0, end_mask = var_934_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_934_cast_fp16")];
+            tensor<int32, [4]> var_938_begin_0 = const()[name = tensor<string, []>("op_938_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_938_end_0 = const()[name = tensor<string, []>("op_938_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_938_end_mask_0 = const()[name = tensor<string, []>("op_938_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_938_cast_fp16 = slice_by_index(begin = var_938_begin_0, end = var_938_end_0, end_mask = var_938_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_938_cast_fp16")];
+            tensor<int32, [4]> var_942_begin_0 = const()[name = tensor<string, []>("op_942_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_942_end_0 = const()[name = tensor<string, []>("op_942_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_942_end_mask_0 = const()[name = tensor<string, []>("op_942_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_942_cast_fp16 = slice_by_index(begin = var_942_begin_0, end = var_942_end_0, end_mask = var_942_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_942_cast_fp16")];
+            tensor<int32, [4]> var_946_begin_0 = const()[name = tensor<string, []>("op_946_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_946_end_0 = const()[name = tensor<string, []>("op_946_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_946_end_mask_0 = const()[name = tensor<string, []>("op_946_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_946_cast_fp16 = slice_by_index(begin = var_946_begin_0, end = var_946_end_0, end_mask = var_946_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_946_cast_fp16")];
+            tensor<int32, [4]> var_950_begin_0 = const()[name = tensor<string, []>("op_950_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_950_end_0 = const()[name = tensor<string, []>("op_950_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_950_end_mask_0 = const()[name = tensor<string, []>("op_950_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_950_cast_fp16 = slice_by_index(begin = var_950_begin_0, end = var_950_end_0, end_mask = var_950_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_950_cast_fp16")];
+            tensor<int32, [4]> var_954_begin_0 = const()[name = tensor<string, []>("op_954_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_954_end_0 = const()[name = tensor<string, []>("op_954_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_954_end_mask_0 = const()[name = tensor<string, []>("op_954_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_954_cast_fp16 = slice_by_index(begin = var_954_begin_0, end = var_954_end_0, end_mask = var_954_end_mask_0, x = value_3_cast_fp16)[name = tensor<string, []>("op_954_cast_fp16")];
+            tensor<string, []> var_958_equation_0 = const()[name = tensor<string, []>("op_958_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_958_cast_fp16 = einsum(equation = var_958_equation_0, values = (var_912_cast_fp16, var_746_cast_fp16))[name = tensor<string, []>("op_958_cast_fp16")];
+            tensor<fp16, []> var_959_to_fp16 = const()[name = tensor<string, []>("op_959_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_49_cast_fp16 = mul(x = var_958_cast_fp16, y = var_959_to_fp16)[name = tensor<string, []>("aw_chunk_49_cast_fp16")];
+            tensor<string, []> var_962_equation_0 = const()[name = tensor<string, []>("op_962_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_962_cast_fp16 = einsum(equation = var_962_equation_0, values = (var_912_cast_fp16, var_753_cast_fp16))[name = tensor<string, []>("op_962_cast_fp16")];
+            tensor<fp16, []> var_963_to_fp16 = const()[name = tensor<string, []>("op_963_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_51_cast_fp16 = mul(x = var_962_cast_fp16, y = var_963_to_fp16)[name = tensor<string, []>("aw_chunk_51_cast_fp16")];
+            tensor<string, []> var_966_equation_0 = const()[name = tensor<string, []>("op_966_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_966_cast_fp16 = einsum(equation = var_966_equation_0, values = (var_912_cast_fp16, var_760_cast_fp16))[name = tensor<string, []>("op_966_cast_fp16")];
+            tensor<fp16, []> var_967_to_fp16 = const()[name = tensor<string, []>("op_967_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_53_cast_fp16 = mul(x = var_966_cast_fp16, y = var_967_to_fp16)[name = tensor<string, []>("aw_chunk_53_cast_fp16")];
+            tensor<string, []> var_970_equation_0 = const()[name = tensor<string, []>("op_970_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_970_cast_fp16 = einsum(equation = var_970_equation_0, values = (var_912_cast_fp16, var_767_cast_fp16))[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<fp16, []> var_971_to_fp16 = const()[name = tensor<string, []>("op_971_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_55_cast_fp16 = mul(x = var_970_cast_fp16, y = var_971_to_fp16)[name = tensor<string, []>("aw_chunk_55_cast_fp16")];
+            tensor<string, []> var_974_equation_0 = const()[name = tensor<string, []>("op_974_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_974_cast_fp16 = einsum(equation = var_974_equation_0, values = (var_916_cast_fp16, var_774_cast_fp16))[name = tensor<string, []>("op_974_cast_fp16")];
+            tensor<fp16, []> var_975_to_fp16 = const()[name = tensor<string, []>("op_975_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_57_cast_fp16 = mul(x = var_974_cast_fp16, y = var_975_to_fp16)[name = tensor<string, []>("aw_chunk_57_cast_fp16")];
+            tensor<string, []> var_978_equation_0 = const()[name = tensor<string, []>("op_978_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_978_cast_fp16 = einsum(equation = var_978_equation_0, values = (var_916_cast_fp16, var_781_cast_fp16))[name = tensor<string, []>("op_978_cast_fp16")];
+            tensor<fp16, []> var_979_to_fp16 = const()[name = tensor<string, []>("op_979_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_59_cast_fp16 = mul(x = var_978_cast_fp16, y = var_979_to_fp16)[name = tensor<string, []>("aw_chunk_59_cast_fp16")];
+            tensor<string, []> var_982_equation_0 = const()[name = tensor<string, []>("op_982_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_916_cast_fp16, var_788_cast_fp16))[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<fp16, []> var_983_to_fp16 = const()[name = tensor<string, []>("op_983_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_61_cast_fp16 = mul(x = var_982_cast_fp16, y = var_983_to_fp16)[name = tensor<string, []>("aw_chunk_61_cast_fp16")];
+            tensor<string, []> var_986_equation_0 = const()[name = tensor<string, []>("op_986_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_916_cast_fp16, var_795_cast_fp16))[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<fp16, []> var_987_to_fp16 = const()[name = tensor<string, []>("op_987_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_63_cast_fp16 = mul(x = var_986_cast_fp16, y = var_987_to_fp16)[name = tensor<string, []>("aw_chunk_63_cast_fp16")];
+            tensor<string, []> var_990_equation_0 = const()[name = tensor<string, []>("op_990_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_920_cast_fp16, var_802_cast_fp16))[name = tensor<string, []>("op_990_cast_fp16")];
+            tensor<fp16, []> var_991_to_fp16 = const()[name = tensor<string, []>("op_991_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_65_cast_fp16 = mul(x = var_990_cast_fp16, y = var_991_to_fp16)[name = tensor<string, []>("aw_chunk_65_cast_fp16")];
+            tensor<string, []> var_994_equation_0 = const()[name = tensor<string, []>("op_994_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_920_cast_fp16, var_809_cast_fp16))[name = tensor<string, []>("op_994_cast_fp16")];
+            tensor<fp16, []> var_995_to_fp16 = const()[name = tensor<string, []>("op_995_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_67_cast_fp16 = mul(x = var_994_cast_fp16, y = var_995_to_fp16)[name = tensor<string, []>("aw_chunk_67_cast_fp16")];
+            tensor<string, []> var_998_equation_0 = const()[name = tensor<string, []>("op_998_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_920_cast_fp16, var_816_cast_fp16))[name = tensor<string, []>("op_998_cast_fp16")];
+            tensor<fp16, []> var_999_to_fp16 = const()[name = tensor<string, []>("op_999_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_69_cast_fp16 = mul(x = var_998_cast_fp16, y = var_999_to_fp16)[name = tensor<string, []>("aw_chunk_69_cast_fp16")];
+            tensor<string, []> var_1002_equation_0 = const()[name = tensor<string, []>("op_1002_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_920_cast_fp16, var_823_cast_fp16))[name = tensor<string, []>("op_1002_cast_fp16")];
+            tensor<fp16, []> var_1003_to_fp16 = const()[name = tensor<string, []>("op_1003_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_71_cast_fp16 = mul(x = var_1002_cast_fp16, y = var_1003_to_fp16)[name = tensor<string, []>("aw_chunk_71_cast_fp16")];
+            tensor<string, []> var_1006_equation_0 = const()[name = tensor<string, []>("op_1006_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1006_cast_fp16 = einsum(equation = var_1006_equation_0, values = (var_924_cast_fp16, var_830_cast_fp16))[name = tensor<string, []>("op_1006_cast_fp16")];
+            tensor<fp16, []> var_1007_to_fp16 = const()[name = tensor<string, []>("op_1007_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_73_cast_fp16 = mul(x = var_1006_cast_fp16, y = var_1007_to_fp16)[name = tensor<string, []>("aw_chunk_73_cast_fp16")];
+            tensor<string, []> var_1010_equation_0 = const()[name = tensor<string, []>("op_1010_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1010_cast_fp16 = einsum(equation = var_1010_equation_0, values = (var_924_cast_fp16, var_837_cast_fp16))[name = tensor<string, []>("op_1010_cast_fp16")];
+            tensor<fp16, []> var_1011_to_fp16 = const()[name = tensor<string, []>("op_1011_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_75_cast_fp16 = mul(x = var_1010_cast_fp16, y = var_1011_to_fp16)[name = tensor<string, []>("aw_chunk_75_cast_fp16")];
+            tensor<string, []> var_1014_equation_0 = const()[name = tensor<string, []>("op_1014_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1014_cast_fp16 = einsum(equation = var_1014_equation_0, values = (var_924_cast_fp16, var_844_cast_fp16))[name = tensor<string, []>("op_1014_cast_fp16")];
+            tensor<fp16, []> var_1015_to_fp16 = const()[name = tensor<string, []>("op_1015_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_77_cast_fp16 = mul(x = var_1014_cast_fp16, y = var_1015_to_fp16)[name = tensor<string, []>("aw_chunk_77_cast_fp16")];
+            tensor<string, []> var_1018_equation_0 = const()[name = tensor<string, []>("op_1018_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1018_cast_fp16 = einsum(equation = var_1018_equation_0, values = (var_924_cast_fp16, var_851_cast_fp16))[name = tensor<string, []>("op_1018_cast_fp16")];
+            tensor<fp16, []> var_1019_to_fp16 = const()[name = tensor<string, []>("op_1019_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_79_cast_fp16 = mul(x = var_1018_cast_fp16, y = var_1019_to_fp16)[name = tensor<string, []>("aw_chunk_79_cast_fp16")];
+            tensor<string, []> var_1022_equation_0 = const()[name = tensor<string, []>("op_1022_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1022_cast_fp16 = einsum(equation = var_1022_equation_0, values = (var_928_cast_fp16, var_858_cast_fp16))[name = tensor<string, []>("op_1022_cast_fp16")];
+            tensor<fp16, []> var_1023_to_fp16 = const()[name = tensor<string, []>("op_1023_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_81_cast_fp16 = mul(x = var_1022_cast_fp16, y = var_1023_to_fp16)[name = tensor<string, []>("aw_chunk_81_cast_fp16")];
+            tensor<string, []> var_1026_equation_0 = const()[name = tensor<string, []>("op_1026_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1026_cast_fp16 = einsum(equation = var_1026_equation_0, values = (var_928_cast_fp16, var_865_cast_fp16))[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<fp16, []> var_1027_to_fp16 = const()[name = tensor<string, []>("op_1027_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_83_cast_fp16 = mul(x = var_1026_cast_fp16, y = var_1027_to_fp16)[name = tensor<string, []>("aw_chunk_83_cast_fp16")];
+            tensor<string, []> var_1030_equation_0 = const()[name = tensor<string, []>("op_1030_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1030_cast_fp16 = einsum(equation = var_1030_equation_0, values = (var_928_cast_fp16, var_872_cast_fp16))[name = tensor<string, []>("op_1030_cast_fp16")];
+            tensor<fp16, []> var_1031_to_fp16 = const()[name = tensor<string, []>("op_1031_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_85_cast_fp16 = mul(x = var_1030_cast_fp16, y = var_1031_to_fp16)[name = tensor<string, []>("aw_chunk_85_cast_fp16")];
+            tensor<string, []> var_1034_equation_0 = const()[name = tensor<string, []>("op_1034_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1034_cast_fp16 = einsum(equation = var_1034_equation_0, values = (var_928_cast_fp16, var_879_cast_fp16))[name = tensor<string, []>("op_1034_cast_fp16")];
+            tensor<fp16, []> var_1035_to_fp16 = const()[name = tensor<string, []>("op_1035_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_87_cast_fp16 = mul(x = var_1034_cast_fp16, y = var_1035_to_fp16)[name = tensor<string, []>("aw_chunk_87_cast_fp16")];
+            tensor<string, []> var_1038_equation_0 = const()[name = tensor<string, []>("op_1038_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1038_cast_fp16 = einsum(equation = var_1038_equation_0, values = (var_932_cast_fp16, var_886_cast_fp16))[name = tensor<string, []>("op_1038_cast_fp16")];
+            tensor<fp16, []> var_1039_to_fp16 = const()[name = tensor<string, []>("op_1039_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_89_cast_fp16 = mul(x = var_1038_cast_fp16, y = var_1039_to_fp16)[name = tensor<string, []>("aw_chunk_89_cast_fp16")];
+            tensor<string, []> var_1042_equation_0 = const()[name = tensor<string, []>("op_1042_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1042_cast_fp16 = einsum(equation = var_1042_equation_0, values = (var_932_cast_fp16, var_893_cast_fp16))[name = tensor<string, []>("op_1042_cast_fp16")];
+            tensor<fp16, []> var_1043_to_fp16 = const()[name = tensor<string, []>("op_1043_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_91_cast_fp16 = mul(x = var_1042_cast_fp16, y = var_1043_to_fp16)[name = tensor<string, []>("aw_chunk_91_cast_fp16")];
+            tensor<string, []> var_1046_equation_0 = const()[name = tensor<string, []>("op_1046_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1046_cast_fp16 = einsum(equation = var_1046_equation_0, values = (var_932_cast_fp16, var_900_cast_fp16))[name = tensor<string, []>("op_1046_cast_fp16")];
+            tensor<fp16, []> var_1047_to_fp16 = const()[name = tensor<string, []>("op_1047_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_93_cast_fp16 = mul(x = var_1046_cast_fp16, y = var_1047_to_fp16)[name = tensor<string, []>("aw_chunk_93_cast_fp16")];
+            tensor<string, []> var_1050_equation_0 = const()[name = tensor<string, []>("op_1050_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1050_cast_fp16 = einsum(equation = var_1050_equation_0, values = (var_932_cast_fp16, var_907_cast_fp16))[name = tensor<string, []>("op_1050_cast_fp16")];
+            tensor<fp16, []> var_1051_to_fp16 = const()[name = tensor<string, []>("op_1051_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_95_cast_fp16 = mul(x = var_1050_cast_fp16, y = var_1051_to_fp16)[name = tensor<string, []>("aw_chunk_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1053_cast_fp16 = softmax(axis = var_662, x = aw_chunk_49_cast_fp16)[name = tensor<string, []>("op_1053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1054_cast_fp16 = softmax(axis = var_662, x = aw_chunk_51_cast_fp16)[name = tensor<string, []>("op_1054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1055_cast_fp16 = softmax(axis = var_662, x = aw_chunk_53_cast_fp16)[name = tensor<string, []>("op_1055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1056_cast_fp16 = softmax(axis = var_662, x = aw_chunk_55_cast_fp16)[name = tensor<string, []>("op_1056_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1057_cast_fp16 = softmax(axis = var_662, x = aw_chunk_57_cast_fp16)[name = tensor<string, []>("op_1057_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1058_cast_fp16 = softmax(axis = var_662, x = aw_chunk_59_cast_fp16)[name = tensor<string, []>("op_1058_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1059_cast_fp16 = softmax(axis = var_662, x = aw_chunk_61_cast_fp16)[name = tensor<string, []>("op_1059_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1060_cast_fp16 = softmax(axis = var_662, x = aw_chunk_63_cast_fp16)[name = tensor<string, []>("op_1060_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1061_cast_fp16 = softmax(axis = var_662, x = aw_chunk_65_cast_fp16)[name = tensor<string, []>("op_1061_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1062_cast_fp16 = softmax(axis = var_662, x = aw_chunk_67_cast_fp16)[name = tensor<string, []>("op_1062_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1063_cast_fp16 = softmax(axis = var_662, x = aw_chunk_69_cast_fp16)[name = tensor<string, []>("op_1063_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1064_cast_fp16 = softmax(axis = var_662, x = aw_chunk_71_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1065_cast_fp16 = softmax(axis = var_662, x = aw_chunk_73_cast_fp16)[name = tensor<string, []>("op_1065_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1066_cast_fp16 = softmax(axis = var_662, x = aw_chunk_75_cast_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1067_cast_fp16 = softmax(axis = var_662, x = aw_chunk_77_cast_fp16)[name = tensor<string, []>("op_1067_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1068_cast_fp16 = softmax(axis = var_662, x = aw_chunk_79_cast_fp16)[name = tensor<string, []>("op_1068_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1069_cast_fp16 = softmax(axis = var_662, x = aw_chunk_81_cast_fp16)[name = tensor<string, []>("op_1069_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1070_cast_fp16 = softmax(axis = var_662, x = aw_chunk_83_cast_fp16)[name = tensor<string, []>("op_1070_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1071_cast_fp16 = softmax(axis = var_662, x = aw_chunk_85_cast_fp16)[name = tensor<string, []>("op_1071_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1072_cast_fp16 = softmax(axis = var_662, x = aw_chunk_87_cast_fp16)[name = tensor<string, []>("op_1072_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1073_cast_fp16 = softmax(axis = var_662, x = aw_chunk_89_cast_fp16)[name = tensor<string, []>("op_1073_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1074_cast_fp16 = softmax(axis = var_662, x = aw_chunk_91_cast_fp16)[name = tensor<string, []>("op_1074_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1075_cast_fp16 = softmax(axis = var_662, x = aw_chunk_93_cast_fp16)[name = tensor<string, []>("op_1075_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1076_cast_fp16 = softmax(axis = var_662, x = aw_chunk_95_cast_fp16)[name = tensor<string, []>("op_1076_cast_fp16")];
+            tensor<string, []> var_1078_equation_0 = const()[name = tensor<string, []>("op_1078_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1078_cast_fp16 = einsum(equation = var_1078_equation_0, values = (var_934_cast_fp16, var_1053_cast_fp16))[name = tensor<string, []>("op_1078_cast_fp16")];
+            tensor<string, []> var_1080_equation_0 = const()[name = tensor<string, []>("op_1080_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1080_cast_fp16 = einsum(equation = var_1080_equation_0, values = (var_934_cast_fp16, var_1054_cast_fp16))[name = tensor<string, []>("op_1080_cast_fp16")];
+            tensor<string, []> var_1082_equation_0 = const()[name = tensor<string, []>("op_1082_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1082_cast_fp16 = einsum(equation = var_1082_equation_0, values = (var_934_cast_fp16, var_1055_cast_fp16))[name = tensor<string, []>("op_1082_cast_fp16")];
+            tensor<string, []> var_1084_equation_0 = const()[name = tensor<string, []>("op_1084_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1084_cast_fp16 = einsum(equation = var_1084_equation_0, values = (var_934_cast_fp16, var_1056_cast_fp16))[name = tensor<string, []>("op_1084_cast_fp16")];
+            tensor<string, []> var_1086_equation_0 = const()[name = tensor<string, []>("op_1086_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1086_cast_fp16 = einsum(equation = var_1086_equation_0, values = (var_938_cast_fp16, var_1057_cast_fp16))[name = tensor<string, []>("op_1086_cast_fp16")];
+            tensor<string, []> var_1088_equation_0 = const()[name = tensor<string, []>("op_1088_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1088_cast_fp16 = einsum(equation = var_1088_equation_0, values = (var_938_cast_fp16, var_1058_cast_fp16))[name = tensor<string, []>("op_1088_cast_fp16")];
+            tensor<string, []> var_1090_equation_0 = const()[name = tensor<string, []>("op_1090_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1090_cast_fp16 = einsum(equation = var_1090_equation_0, values = (var_938_cast_fp16, var_1059_cast_fp16))[name = tensor<string, []>("op_1090_cast_fp16")];
+            tensor<string, []> var_1092_equation_0 = const()[name = tensor<string, []>("op_1092_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1092_cast_fp16 = einsum(equation = var_1092_equation_0, values = (var_938_cast_fp16, var_1060_cast_fp16))[name = tensor<string, []>("op_1092_cast_fp16")];
+            tensor<string, []> var_1094_equation_0 = const()[name = tensor<string, []>("op_1094_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1094_cast_fp16 = einsum(equation = var_1094_equation_0, values = (var_942_cast_fp16, var_1061_cast_fp16))[name = tensor<string, []>("op_1094_cast_fp16")];
+            tensor<string, []> var_1096_equation_0 = const()[name = tensor<string, []>("op_1096_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1096_cast_fp16 = einsum(equation = var_1096_equation_0, values = (var_942_cast_fp16, var_1062_cast_fp16))[name = tensor<string, []>("op_1096_cast_fp16")];
+            tensor<string, []> var_1098_equation_0 = const()[name = tensor<string, []>("op_1098_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1098_cast_fp16 = einsum(equation = var_1098_equation_0, values = (var_942_cast_fp16, var_1063_cast_fp16))[name = tensor<string, []>("op_1098_cast_fp16")];
+            tensor<string, []> var_1100_equation_0 = const()[name = tensor<string, []>("op_1100_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1100_cast_fp16 = einsum(equation = var_1100_equation_0, values = (var_942_cast_fp16, var_1064_cast_fp16))[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<string, []> var_1102_equation_0 = const()[name = tensor<string, []>("op_1102_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1102_cast_fp16 = einsum(equation = var_1102_equation_0, values = (var_946_cast_fp16, var_1065_cast_fp16))[name = tensor<string, []>("op_1102_cast_fp16")];
+            tensor<string, []> var_1104_equation_0 = const()[name = tensor<string, []>("op_1104_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1104_cast_fp16 = einsum(equation = var_1104_equation_0, values = (var_946_cast_fp16, var_1066_cast_fp16))[name = tensor<string, []>("op_1104_cast_fp16")];
+            tensor<string, []> var_1106_equation_0 = const()[name = tensor<string, []>("op_1106_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1106_cast_fp16 = einsum(equation = var_1106_equation_0, values = (var_946_cast_fp16, var_1067_cast_fp16))[name = tensor<string, []>("op_1106_cast_fp16")];
+            tensor<string, []> var_1108_equation_0 = const()[name = tensor<string, []>("op_1108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1108_cast_fp16 = einsum(equation = var_1108_equation_0, values = (var_946_cast_fp16, var_1068_cast_fp16))[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<string, []> var_1110_equation_0 = const()[name = tensor<string, []>("op_1110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1110_cast_fp16 = einsum(equation = var_1110_equation_0, values = (var_950_cast_fp16, var_1069_cast_fp16))[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<string, []> var_1112_equation_0 = const()[name = tensor<string, []>("op_1112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1112_cast_fp16 = einsum(equation = var_1112_equation_0, values = (var_950_cast_fp16, var_1070_cast_fp16))[name = tensor<string, []>("op_1112_cast_fp16")];
+            tensor<string, []> var_1114_equation_0 = const()[name = tensor<string, []>("op_1114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1114_cast_fp16 = einsum(equation = var_1114_equation_0, values = (var_950_cast_fp16, var_1071_cast_fp16))[name = tensor<string, []>("op_1114_cast_fp16")];
+            tensor<string, []> var_1116_equation_0 = const()[name = tensor<string, []>("op_1116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1116_cast_fp16 = einsum(equation = var_1116_equation_0, values = (var_950_cast_fp16, var_1072_cast_fp16))[name = tensor<string, []>("op_1116_cast_fp16")];
+            tensor<string, []> var_1118_equation_0 = const()[name = tensor<string, []>("op_1118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1118_cast_fp16 = einsum(equation = var_1118_equation_0, values = (var_954_cast_fp16, var_1073_cast_fp16))[name = tensor<string, []>("op_1118_cast_fp16")];
+            tensor<string, []> var_1120_equation_0 = const()[name = tensor<string, []>("op_1120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1120_cast_fp16 = einsum(equation = var_1120_equation_0, values = (var_954_cast_fp16, var_1074_cast_fp16))[name = tensor<string, []>("op_1120_cast_fp16")];
+            tensor<string, []> var_1122_equation_0 = const()[name = tensor<string, []>("op_1122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1122_cast_fp16 = einsum(equation = var_1122_equation_0, values = (var_954_cast_fp16, var_1075_cast_fp16))[name = tensor<string, []>("op_1122_cast_fp16")];
+            tensor<string, []> var_1124_equation_0 = const()[name = tensor<string, []>("op_1124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1124_cast_fp16 = einsum(equation = var_1124_equation_0, values = (var_954_cast_fp16, var_1076_cast_fp16))[name = tensor<string, []>("op_1124_cast_fp16")];
+            tensor<bool, []> var_1126_interleave_0 = const()[name = tensor<string, []>("op_1126_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1126_cast_fp16 = concat(axis = var_651, interleave = var_1126_interleave_0, values = (var_1078_cast_fp16, var_1080_cast_fp16, var_1082_cast_fp16, var_1084_cast_fp16))[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<bool, []> var_1128_interleave_0 = const()[name = tensor<string, []>("op_1128_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1128_cast_fp16 = concat(axis = var_651, interleave = var_1128_interleave_0, values = (var_1086_cast_fp16, var_1088_cast_fp16, var_1090_cast_fp16, var_1092_cast_fp16))[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<bool, []> var_1130_interleave_0 = const()[name = tensor<string, []>("op_1130_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1130_cast_fp16 = concat(axis = var_651, interleave = var_1130_interleave_0, values = (var_1094_cast_fp16, var_1096_cast_fp16, var_1098_cast_fp16, var_1100_cast_fp16))[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<bool, []> var_1132_interleave_0 = const()[name = tensor<string, []>("op_1132_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1132_cast_fp16 = concat(axis = var_651, interleave = var_1132_interleave_0, values = (var_1102_cast_fp16, var_1104_cast_fp16, var_1106_cast_fp16, var_1108_cast_fp16))[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<bool, []> var_1134_interleave_0 = const()[name = tensor<string, []>("op_1134_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1134_cast_fp16 = concat(axis = var_651, interleave = var_1134_interleave_0, values = (var_1110_cast_fp16, var_1112_cast_fp16, var_1114_cast_fp16, var_1116_cast_fp16))[name = tensor<string, []>("op_1134_cast_fp16")];
+            tensor<bool, []> var_1136_interleave_0 = const()[name = tensor<string, []>("op_1136_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1136_cast_fp16 = concat(axis = var_651, interleave = var_1136_interleave_0, values = (var_1118_cast_fp16, var_1120_cast_fp16, var_1122_cast_fp16, var_1124_cast_fp16))[name = tensor<string, []>("op_1136_cast_fp16")];
+            tensor<bool, []> input_9_interleave_0 = const()[name = tensor<string, []>("input_9_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_9_cast_fp16 = concat(axis = var_662, interleave = input_9_interleave_0, values = (var_1126_cast_fp16, var_1128_cast_fp16, var_1130_cast_fp16, var_1132_cast_fp16, var_1134_cast_fp16, var_1136_cast_fp16))[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_1141 = const()[name = tensor<string, []>("op_1141"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1143 = const()[name = tensor<string, []>("op_1143"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6662016)))];
+            tensor<fp16, [384]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6956992)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_1143, groups = var_662, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_1141, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> var_1149 = const()[name = tensor<string, []>("op_1149"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_1149, keep_dims = var_663, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_1153 = const()[name = tensor<string, []>("op_1153"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1154_cast_fp16 = reduce_mean(axes = var_1153, keep_dims = var_663, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_1154_cast_fp16")];
+            tensor<fp16, []> var_1155_to_fp16 = const()[name = tensor<string, []>("op_1155_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1156_cast_fp16 = add(x = var_1154_cast_fp16, y = var_1155_to_fp16)[name = tensor<string, []>("op_1156_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_1156_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [384]> input_11_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_11_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6957824)))];
+            tensor<fp16, [384]> input_11_beta_0_to_fp16 = const()[name = tensor<string, []>("input_11_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6958656)))];
+            tensor<fp16, []> input_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_1167 = const()[name = tensor<string, []>("op_1167"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1169 = const()[name = tensor<string, []>("op_1169"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_13_pad_type_0 = const()[name = tensor<string, []>("input_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6959488)))];
+            tensor<fp16, [1536]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8139200)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_1169, groups = var_662, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = var_1167, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_1175 = const()[name = tensor<string, []>("op_1175"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1177 = const()[name = tensor<string, []>("op_1177"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8142336)))];
+            tensor<fp16, [384]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322048)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_1177, groups = var_662, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_1175, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_1184 = const()[name = tensor<string, []>("op_1184"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1195 = const()[name = tensor<string, []>("op_1195"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1196 = const()[name = tensor<string, []>("op_1196"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1206 = const()[name = tensor<string, []>("op_1206"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_1206, keep_dims = var_1196, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_1210 = const()[name = tensor<string, []>("op_1210"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1211_cast_fp16 = reduce_mean(axes = var_1210, keep_dims = var_1196, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_1211_cast_fp16")];
+            tensor<fp16, []> var_1212_to_fp16 = const()[name = tensor<string, []>("op_1212_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1213_cast_fp16 = add(x = var_1211_cast_fp16, y = var_1212_to_fp16)[name = tensor<string, []>("op_1213_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_1213_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [384]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322880)))];
+            tensor<fp16, [384]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9323712)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_1228 = const()[name = tensor<string, []>("op_1228"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1230 = const()[name = tensor<string, []>("op_1230"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9324544)))];
+            tensor<fp16, [384]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9619520)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_1230, groups = var_1195, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_1228, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_1234 = const()[name = tensor<string, []>("op_1234"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1236 = const()[name = tensor<string, []>("op_1236"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_5_pad_type_0 = const()[name = tensor<string, []>("key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_5_pad_0 = const()[name = tensor<string, []>("key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9620352)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_5_cast_fp16 = conv(dilations = var_1236, groups = var_1195, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = var_1234, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<int32, [2]> var_1241 = const()[name = tensor<string, []>("op_1241"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1243 = const()[name = tensor<string, []>("op_1243"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_5_pad_type_0 = const()[name = tensor<string, []>("value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_5_pad_0 = const()[name = tensor<string, []>("value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9915328)))];
+            tensor<fp16, [384]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10210304)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_1243, groups = var_1195, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = var_1241, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_1250_begin_0 = const()[name = tensor<string, []>("op_1250_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1250_end_0 = const()[name = tensor<string, []>("op_1250_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1250_end_mask_0 = const()[name = tensor<string, []>("op_1250_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1250_cast_fp16 = slice_by_index(begin = var_1250_begin_0, end = var_1250_end_0, end_mask = var_1250_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1250_cast_fp16")];
+            tensor<int32, [4]> var_1254_begin_0 = const()[name = tensor<string, []>("op_1254_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1254_end_0 = const()[name = tensor<string, []>("op_1254_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1254_end_mask_0 = const()[name = tensor<string, []>("op_1254_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1254_cast_fp16 = slice_by_index(begin = var_1254_begin_0, end = var_1254_end_0, end_mask = var_1254_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<int32, [4]> var_1258_begin_0 = const()[name = tensor<string, []>("op_1258_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1258_end_0 = const()[name = tensor<string, []>("op_1258_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1258_end_mask_0 = const()[name = tensor<string, []>("op_1258_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1258_cast_fp16 = slice_by_index(begin = var_1258_begin_0, end = var_1258_end_0, end_mask = var_1258_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1258_cast_fp16")];
+            tensor<int32, [4]> var_1262_begin_0 = const()[name = tensor<string, []>("op_1262_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1262_end_0 = const()[name = tensor<string, []>("op_1262_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1262_end_mask_0 = const()[name = tensor<string, []>("op_1262_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = var_1262_end_0, end_mask = var_1262_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1262_cast_fp16")];
+            tensor<int32, [4]> var_1266_begin_0 = const()[name = tensor<string, []>("op_1266_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1266_end_0 = const()[name = tensor<string, []>("op_1266_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1266_end_mask_0 = const()[name = tensor<string, []>("op_1266_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1266_cast_fp16 = slice_by_index(begin = var_1266_begin_0, end = var_1266_end_0, end_mask = var_1266_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1266_cast_fp16")];
+            tensor<int32, [4]> var_1270_begin_0 = const()[name = tensor<string, []>("op_1270_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1270_end_0 = const()[name = tensor<string, []>("op_1270_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1270_end_mask_0 = const()[name = tensor<string, []>("op_1270_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1270_cast_fp16 = slice_by_index(begin = var_1270_begin_0, end = var_1270_end_0, end_mask = var_1270_end_mask_0, x = query_5_cast_fp16)[name = tensor<string, []>("op_1270_cast_fp16")];
+            tensor<int32, [4]> var_1279_begin_0 = const()[name = tensor<string, []>("op_1279_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1279_end_0 = const()[name = tensor<string, []>("op_1279_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1279_end_mask_0 = const()[name = tensor<string, []>("op_1279_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1279_cast_fp16 = slice_by_index(begin = var_1279_begin_0, end = var_1279_end_0, end_mask = var_1279_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1279_cast_fp16")];
+            tensor<int32, [4]> var_1286_begin_0 = const()[name = tensor<string, []>("op_1286_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1286_end_0 = const()[name = tensor<string, []>("op_1286_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1286_end_mask_0 = const()[name = tensor<string, []>("op_1286_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1286_cast_fp16 = slice_by_index(begin = var_1286_begin_0, end = var_1286_end_0, end_mask = var_1286_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1286_cast_fp16")];
+            tensor<int32, [4]> var_1293_begin_0 = const()[name = tensor<string, []>("op_1293_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1293_end_0 = const()[name = tensor<string, []>("op_1293_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1293_end_mask_0 = const()[name = tensor<string, []>("op_1293_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1293_cast_fp16 = slice_by_index(begin = var_1293_begin_0, end = var_1293_end_0, end_mask = var_1293_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1293_cast_fp16")];
+            tensor<int32, [4]> var_1300_begin_0 = const()[name = tensor<string, []>("op_1300_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1300_end_0 = const()[name = tensor<string, []>("op_1300_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1300_end_mask_0 = const()[name = tensor<string, []>("op_1300_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = var_1250_cast_fp16)[name = tensor<string, []>("op_1300_cast_fp16")];
+            tensor<int32, [4]> var_1307_begin_0 = const()[name = tensor<string, []>("op_1307_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1307_end_0 = const()[name = tensor<string, []>("op_1307_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1307_end_mask_0 = const()[name = tensor<string, []>("op_1307_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1307_cast_fp16 = slice_by_index(begin = var_1307_begin_0, end = var_1307_end_0, end_mask = var_1307_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1307_cast_fp16")];
+            tensor<int32, [4]> var_1314_begin_0 = const()[name = tensor<string, []>("op_1314_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1314_end_0 = const()[name = tensor<string, []>("op_1314_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1314_end_mask_0 = const()[name = tensor<string, []>("op_1314_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1314_cast_fp16 = slice_by_index(begin = var_1314_begin_0, end = var_1314_end_0, end_mask = var_1314_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1314_cast_fp16")];
+            tensor<int32, [4]> var_1321_begin_0 = const()[name = tensor<string, []>("op_1321_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1321_end_0 = const()[name = tensor<string, []>("op_1321_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1321_end_mask_0 = const()[name = tensor<string, []>("op_1321_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1321_cast_fp16 = slice_by_index(begin = var_1321_begin_0, end = var_1321_end_0, end_mask = var_1321_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1321_cast_fp16")];
+            tensor<int32, [4]> var_1328_begin_0 = const()[name = tensor<string, []>("op_1328_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1328_end_0 = const()[name = tensor<string, []>("op_1328_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1328_end_mask_0 = const()[name = tensor<string, []>("op_1328_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = var_1254_cast_fp16)[name = tensor<string, []>("op_1328_cast_fp16")];
+            tensor<int32, [4]> var_1335_begin_0 = const()[name = tensor<string, []>("op_1335_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1335_end_0 = const()[name = tensor<string, []>("op_1335_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1335_end_mask_0 = const()[name = tensor<string, []>("op_1335_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1335_cast_fp16 = slice_by_index(begin = var_1335_begin_0, end = var_1335_end_0, end_mask = var_1335_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1335_cast_fp16")];
+            tensor<int32, [4]> var_1342_begin_0 = const()[name = tensor<string, []>("op_1342_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1342_end_0 = const()[name = tensor<string, []>("op_1342_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1342_end_mask_0 = const()[name = tensor<string, []>("op_1342_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1342_cast_fp16 = slice_by_index(begin = var_1342_begin_0, end = var_1342_end_0, end_mask = var_1342_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
+            tensor<int32, [4]> var_1349_begin_0 = const()[name = tensor<string, []>("op_1349_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1349_end_0 = const()[name = tensor<string, []>("op_1349_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1349_end_mask_0 = const()[name = tensor<string, []>("op_1349_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1349_cast_fp16 = slice_by_index(begin = var_1349_begin_0, end = var_1349_end_0, end_mask = var_1349_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1349_cast_fp16")];
+            tensor<int32, [4]> var_1356_begin_0 = const()[name = tensor<string, []>("op_1356_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1356_end_0 = const()[name = tensor<string, []>("op_1356_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1356_end_mask_0 = const()[name = tensor<string, []>("op_1356_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = var_1258_cast_fp16)[name = tensor<string, []>("op_1356_cast_fp16")];
+            tensor<int32, [4]> var_1363_begin_0 = const()[name = tensor<string, []>("op_1363_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1363_end_0 = const()[name = tensor<string, []>("op_1363_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1363_end_mask_0 = const()[name = tensor<string, []>("op_1363_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1363_cast_fp16 = slice_by_index(begin = var_1363_begin_0, end = var_1363_end_0, end_mask = var_1363_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<int32, [4]> var_1370_begin_0 = const()[name = tensor<string, []>("op_1370_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1370_end_0 = const()[name = tensor<string, []>("op_1370_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1370_end_mask_0 = const()[name = tensor<string, []>("op_1370_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1370_cast_fp16 = slice_by_index(begin = var_1370_begin_0, end = var_1370_end_0, end_mask = var_1370_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1370_cast_fp16")];
+            tensor<int32, [4]> var_1377_begin_0 = const()[name = tensor<string, []>("op_1377_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1377_end_0 = const()[name = tensor<string, []>("op_1377_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1377_end_mask_0 = const()[name = tensor<string, []>("op_1377_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1377_cast_fp16 = slice_by_index(begin = var_1377_begin_0, end = var_1377_end_0, end_mask = var_1377_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1377_cast_fp16")];
+            tensor<int32, [4]> var_1384_begin_0 = const()[name = tensor<string, []>("op_1384_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1384_end_0 = const()[name = tensor<string, []>("op_1384_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1384_end_mask_0 = const()[name = tensor<string, []>("op_1384_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1384_cast_fp16 = slice_by_index(begin = var_1384_begin_0, end = var_1384_end_0, end_mask = var_1384_end_mask_0, x = var_1262_cast_fp16)[name = tensor<string, []>("op_1384_cast_fp16")];
+            tensor<int32, [4]> var_1391_begin_0 = const()[name = tensor<string, []>("op_1391_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1391_end_0 = const()[name = tensor<string, []>("op_1391_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1391_end_mask_0 = const()[name = tensor<string, []>("op_1391_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1391_cast_fp16 = slice_by_index(begin = var_1391_begin_0, end = var_1391_end_0, end_mask = var_1391_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1391_cast_fp16")];
+            tensor<int32, [4]> var_1398_begin_0 = const()[name = tensor<string, []>("op_1398_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1398_end_0 = const()[name = tensor<string, []>("op_1398_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1398_end_mask_0 = const()[name = tensor<string, []>("op_1398_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1398_cast_fp16")];
+            tensor<int32, [4]> var_1405_begin_0 = const()[name = tensor<string, []>("op_1405_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1405_end_0 = const()[name = tensor<string, []>("op_1405_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1405_end_mask_0 = const()[name = tensor<string, []>("op_1405_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1405_cast_fp16 = slice_by_index(begin = var_1405_begin_0, end = var_1405_end_0, end_mask = var_1405_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1405_cast_fp16")];
+            tensor<int32, [4]> var_1412_begin_0 = const()[name = tensor<string, []>("op_1412_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1412_end_0 = const()[name = tensor<string, []>("op_1412_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1412_end_mask_0 = const()[name = tensor<string, []>("op_1412_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1412_cast_fp16 = slice_by_index(begin = var_1412_begin_0, end = var_1412_end_0, end_mask = var_1412_end_mask_0, x = var_1266_cast_fp16)[name = tensor<string, []>("op_1412_cast_fp16")];
+            tensor<int32, [4]> var_1419_begin_0 = const()[name = tensor<string, []>("op_1419_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1419_end_0 = const()[name = tensor<string, []>("op_1419_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1419_end_mask_0 = const()[name = tensor<string, []>("op_1419_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1419_cast_fp16 = slice_by_index(begin = var_1419_begin_0, end = var_1419_end_0, end_mask = var_1419_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1419_cast_fp16")];
+            tensor<int32, [4]> var_1426_begin_0 = const()[name = tensor<string, []>("op_1426_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1426_end_0 = const()[name = tensor<string, []>("op_1426_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1426_end_mask_0 = const()[name = tensor<string, []>("op_1426_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1426_cast_fp16")];
+            tensor<int32, [4]> var_1433_begin_0 = const()[name = tensor<string, []>("op_1433_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1433_end_0 = const()[name = tensor<string, []>("op_1433_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1433_end_mask_0 = const()[name = tensor<string, []>("op_1433_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1433_cast_fp16 = slice_by_index(begin = var_1433_begin_0, end = var_1433_end_0, end_mask = var_1433_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
+            tensor<int32, [4]> var_1440_begin_0 = const()[name = tensor<string, []>("op_1440_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1440_end_0 = const()[name = tensor<string, []>("op_1440_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1440_end_mask_0 = const()[name = tensor<string, []>("op_1440_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1440_cast_fp16 = slice_by_index(begin = var_1440_begin_0, end = var_1440_end_0, end_mask = var_1440_end_mask_0, x = var_1270_cast_fp16)[name = tensor<string, []>("op_1440_cast_fp16")];
+            tensor<int32, [4]> k_5_perm_0 = const()[name = tensor<string, []>("k_5_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1445_begin_0 = const()[name = tensor<string, []>("op_1445_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1445_end_0 = const()[name = tensor<string, []>("op_1445_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1445_end_mask_0 = const()[name = tensor<string, []>("op_1445_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> transpose_1 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1445_cast_fp16 = slice_by_index(begin = var_1445_begin_0, end = var_1445_end_0, end_mask = var_1445_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_1445_cast_fp16")];
+            tensor<int32, [4]> var_1449_begin_0 = const()[name = tensor<string, []>("op_1449_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1449_end_0 = const()[name = tensor<string, []>("op_1449_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1449_end_mask_0 = const()[name = tensor<string, []>("op_1449_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1449_cast_fp16 = slice_by_index(begin = var_1449_begin_0, end = var_1449_end_0, end_mask = var_1449_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_1449_cast_fp16")];
+            tensor<int32, [4]> var_1453_begin_0 = const()[name = tensor<string, []>("op_1453_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1453_end_0 = const()[name = tensor<string, []>("op_1453_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1453_end_mask_0 = const()[name = tensor<string, []>("op_1453_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = var_1453_end_0, end_mask = var_1453_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_1453_cast_fp16")];
+            tensor<int32, [4]> var_1457_begin_0 = const()[name = tensor<string, []>("op_1457_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1457_end_0 = const()[name = tensor<string, []>("op_1457_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1457_end_mask_0 = const()[name = tensor<string, []>("op_1457_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1457_cast_fp16 = slice_by_index(begin = var_1457_begin_0, end = var_1457_end_0, end_mask = var_1457_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_1457_cast_fp16")];
+            tensor<int32, [4]> var_1461_begin_0 = const()[name = tensor<string, []>("op_1461_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1461_end_0 = const()[name = tensor<string, []>("op_1461_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1461_end_mask_0 = const()[name = tensor<string, []>("op_1461_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1461_cast_fp16 = slice_by_index(begin = var_1461_begin_0, end = var_1461_end_0, end_mask = var_1461_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_1461_cast_fp16")];
+            tensor<int32, [4]> var_1465_begin_0 = const()[name = tensor<string, []>("op_1465_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1465_end_0 = const()[name = tensor<string, []>("op_1465_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1465_end_mask_0 = const()[name = tensor<string, []>("op_1465_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1465_cast_fp16 = slice_by_index(begin = var_1465_begin_0, end = var_1465_end_0, end_mask = var_1465_end_mask_0, x = transpose_1)[name = tensor<string, []>("op_1465_cast_fp16")];
+            tensor<int32, [4]> var_1467_begin_0 = const()[name = tensor<string, []>("op_1467_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1467_end_0 = const()[name = tensor<string, []>("op_1467_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1467_end_mask_0 = const()[name = tensor<string, []>("op_1467_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1467_cast_fp16 = slice_by_index(begin = var_1467_begin_0, end = var_1467_end_0, end_mask = var_1467_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1467_cast_fp16")];
+            tensor<int32, [4]> var_1471_begin_0 = const()[name = tensor<string, []>("op_1471_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1471_end_0 = const()[name = tensor<string, []>("op_1471_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1471_end_mask_0 = const()[name = tensor<string, []>("op_1471_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1471_cast_fp16 = slice_by_index(begin = var_1471_begin_0, end = var_1471_end_0, end_mask = var_1471_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1471_cast_fp16")];
+            tensor<int32, [4]> var_1475_begin_0 = const()[name = tensor<string, []>("op_1475_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1475_end_0 = const()[name = tensor<string, []>("op_1475_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1475_end_mask_0 = const()[name = tensor<string, []>("op_1475_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1475_cast_fp16 = slice_by_index(begin = var_1475_begin_0, end = var_1475_end_0, end_mask = var_1475_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1475_cast_fp16")];
+            tensor<int32, [4]> var_1479_begin_0 = const()[name = tensor<string, []>("op_1479_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1479_end_0 = const()[name = tensor<string, []>("op_1479_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1479_end_mask_0 = const()[name = tensor<string, []>("op_1479_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1479_cast_fp16 = slice_by_index(begin = var_1479_begin_0, end = var_1479_end_0, end_mask = var_1479_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1479_cast_fp16")];
+            tensor<int32, [4]> var_1483_begin_0 = const()[name = tensor<string, []>("op_1483_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1483_end_0 = const()[name = tensor<string, []>("op_1483_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1483_end_mask_0 = const()[name = tensor<string, []>("op_1483_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1483_cast_fp16 = slice_by_index(begin = var_1483_begin_0, end = var_1483_end_0, end_mask = var_1483_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1483_cast_fp16")];
+            tensor<int32, [4]> var_1487_begin_0 = const()[name = tensor<string, []>("op_1487_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1487_end_0 = const()[name = tensor<string, []>("op_1487_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1487_end_mask_0 = const()[name = tensor<string, []>("op_1487_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1487_cast_fp16 = slice_by_index(begin = var_1487_begin_0, end = var_1487_end_0, end_mask = var_1487_end_mask_0, x = value_5_cast_fp16)[name = tensor<string, []>("op_1487_cast_fp16")];
+            tensor<string, []> var_1491_equation_0 = const()[name = tensor<string, []>("op_1491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1491_cast_fp16 = einsum(equation = var_1491_equation_0, values = (var_1445_cast_fp16, var_1279_cast_fp16))[name = tensor<string, []>("op_1491_cast_fp16")];
+            tensor<fp16, []> var_1492_to_fp16 = const()[name = tensor<string, []>("op_1492_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_97_cast_fp16 = mul(x = var_1491_cast_fp16, y = var_1492_to_fp16)[name = tensor<string, []>("aw_chunk_97_cast_fp16")];
+            tensor<string, []> var_1495_equation_0 = const()[name = tensor<string, []>("op_1495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1495_cast_fp16 = einsum(equation = var_1495_equation_0, values = (var_1445_cast_fp16, var_1286_cast_fp16))[name = tensor<string, []>("op_1495_cast_fp16")];
+            tensor<fp16, []> var_1496_to_fp16 = const()[name = tensor<string, []>("op_1496_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_99_cast_fp16 = mul(x = var_1495_cast_fp16, y = var_1496_to_fp16)[name = tensor<string, []>("aw_chunk_99_cast_fp16")];
+            tensor<string, []> var_1499_equation_0 = const()[name = tensor<string, []>("op_1499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1499_cast_fp16 = einsum(equation = var_1499_equation_0, values = (var_1445_cast_fp16, var_1293_cast_fp16))[name = tensor<string, []>("op_1499_cast_fp16")];
+            tensor<fp16, []> var_1500_to_fp16 = const()[name = tensor<string, []>("op_1500_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_101_cast_fp16 = mul(x = var_1499_cast_fp16, y = var_1500_to_fp16)[name = tensor<string, []>("aw_chunk_101_cast_fp16")];
+            tensor<string, []> var_1503_equation_0 = const()[name = tensor<string, []>("op_1503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1503_cast_fp16 = einsum(equation = var_1503_equation_0, values = (var_1445_cast_fp16, var_1300_cast_fp16))[name = tensor<string, []>("op_1503_cast_fp16")];
+            tensor<fp16, []> var_1504_to_fp16 = const()[name = tensor<string, []>("op_1504_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_103_cast_fp16 = mul(x = var_1503_cast_fp16, y = var_1504_to_fp16)[name = tensor<string, []>("aw_chunk_103_cast_fp16")];
+            tensor<string, []> var_1507_equation_0 = const()[name = tensor<string, []>("op_1507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1507_cast_fp16 = einsum(equation = var_1507_equation_0, values = (var_1449_cast_fp16, var_1307_cast_fp16))[name = tensor<string, []>("op_1507_cast_fp16")];
+            tensor<fp16, []> var_1508_to_fp16 = const()[name = tensor<string, []>("op_1508_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_105_cast_fp16 = mul(x = var_1507_cast_fp16, y = var_1508_to_fp16)[name = tensor<string, []>("aw_chunk_105_cast_fp16")];
+            tensor<string, []> var_1511_equation_0 = const()[name = tensor<string, []>("op_1511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1511_cast_fp16 = einsum(equation = var_1511_equation_0, values = (var_1449_cast_fp16, var_1314_cast_fp16))[name = tensor<string, []>("op_1511_cast_fp16")];
+            tensor<fp16, []> var_1512_to_fp16 = const()[name = tensor<string, []>("op_1512_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_107_cast_fp16 = mul(x = var_1511_cast_fp16, y = var_1512_to_fp16)[name = tensor<string, []>("aw_chunk_107_cast_fp16")];
+            tensor<string, []> var_1515_equation_0 = const()[name = tensor<string, []>("op_1515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1515_cast_fp16 = einsum(equation = var_1515_equation_0, values = (var_1449_cast_fp16, var_1321_cast_fp16))[name = tensor<string, []>("op_1515_cast_fp16")];
+            tensor<fp16, []> var_1516_to_fp16 = const()[name = tensor<string, []>("op_1516_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_109_cast_fp16 = mul(x = var_1515_cast_fp16, y = var_1516_to_fp16)[name = tensor<string, []>("aw_chunk_109_cast_fp16")];
+            tensor<string, []> var_1519_equation_0 = const()[name = tensor<string, []>("op_1519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1519_cast_fp16 = einsum(equation = var_1519_equation_0, values = (var_1449_cast_fp16, var_1328_cast_fp16))[name = tensor<string, []>("op_1519_cast_fp16")];
+            tensor<fp16, []> var_1520_to_fp16 = const()[name = tensor<string, []>("op_1520_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_111_cast_fp16 = mul(x = var_1519_cast_fp16, y = var_1520_to_fp16)[name = tensor<string, []>("aw_chunk_111_cast_fp16")];
+            tensor<string, []> var_1523_equation_0 = const()[name = tensor<string, []>("op_1523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1523_cast_fp16 = einsum(equation = var_1523_equation_0, values = (var_1453_cast_fp16, var_1335_cast_fp16))[name = tensor<string, []>("op_1523_cast_fp16")];
+            tensor<fp16, []> var_1524_to_fp16 = const()[name = tensor<string, []>("op_1524_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_113_cast_fp16 = mul(x = var_1523_cast_fp16, y = var_1524_to_fp16)[name = tensor<string, []>("aw_chunk_113_cast_fp16")];
+            tensor<string, []> var_1527_equation_0 = const()[name = tensor<string, []>("op_1527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1527_cast_fp16 = einsum(equation = var_1527_equation_0, values = (var_1453_cast_fp16, var_1342_cast_fp16))[name = tensor<string, []>("op_1527_cast_fp16")];
+            tensor<fp16, []> var_1528_to_fp16 = const()[name = tensor<string, []>("op_1528_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_115_cast_fp16 = mul(x = var_1527_cast_fp16, y = var_1528_to_fp16)[name = tensor<string, []>("aw_chunk_115_cast_fp16")];
+            tensor<string, []> var_1531_equation_0 = const()[name = tensor<string, []>("op_1531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1531_cast_fp16 = einsum(equation = var_1531_equation_0, values = (var_1453_cast_fp16, var_1349_cast_fp16))[name = tensor<string, []>("op_1531_cast_fp16")];
+            tensor<fp16, []> var_1532_to_fp16 = const()[name = tensor<string, []>("op_1532_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_117_cast_fp16 = mul(x = var_1531_cast_fp16, y = var_1532_to_fp16)[name = tensor<string, []>("aw_chunk_117_cast_fp16")];
+            tensor<string, []> var_1535_equation_0 = const()[name = tensor<string, []>("op_1535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1535_cast_fp16 = einsum(equation = var_1535_equation_0, values = (var_1453_cast_fp16, var_1356_cast_fp16))[name = tensor<string, []>("op_1535_cast_fp16")];
+            tensor<fp16, []> var_1536_to_fp16 = const()[name = tensor<string, []>("op_1536_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_119_cast_fp16 = mul(x = var_1535_cast_fp16, y = var_1536_to_fp16)[name = tensor<string, []>("aw_chunk_119_cast_fp16")];
+            tensor<string, []> var_1539_equation_0 = const()[name = tensor<string, []>("op_1539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1539_cast_fp16 = einsum(equation = var_1539_equation_0, values = (var_1457_cast_fp16, var_1363_cast_fp16))[name = tensor<string, []>("op_1539_cast_fp16")];
+            tensor<fp16, []> var_1540_to_fp16 = const()[name = tensor<string, []>("op_1540_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_121_cast_fp16 = mul(x = var_1539_cast_fp16, y = var_1540_to_fp16)[name = tensor<string, []>("aw_chunk_121_cast_fp16")];
+            tensor<string, []> var_1543_equation_0 = const()[name = tensor<string, []>("op_1543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1543_cast_fp16 = einsum(equation = var_1543_equation_0, values = (var_1457_cast_fp16, var_1370_cast_fp16))[name = tensor<string, []>("op_1543_cast_fp16")];
+            tensor<fp16, []> var_1544_to_fp16 = const()[name = tensor<string, []>("op_1544_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_123_cast_fp16 = mul(x = var_1543_cast_fp16, y = var_1544_to_fp16)[name = tensor<string, []>("aw_chunk_123_cast_fp16")];
+            tensor<string, []> var_1547_equation_0 = const()[name = tensor<string, []>("op_1547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1547_cast_fp16 = einsum(equation = var_1547_equation_0, values = (var_1457_cast_fp16, var_1377_cast_fp16))[name = tensor<string, []>("op_1547_cast_fp16")];
+            tensor<fp16, []> var_1548_to_fp16 = const()[name = tensor<string, []>("op_1548_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_125_cast_fp16 = mul(x = var_1547_cast_fp16, y = var_1548_to_fp16)[name = tensor<string, []>("aw_chunk_125_cast_fp16")];
+            tensor<string, []> var_1551_equation_0 = const()[name = tensor<string, []>("op_1551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1551_cast_fp16 = einsum(equation = var_1551_equation_0, values = (var_1457_cast_fp16, var_1384_cast_fp16))[name = tensor<string, []>("op_1551_cast_fp16")];
+            tensor<fp16, []> var_1552_to_fp16 = const()[name = tensor<string, []>("op_1552_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_127_cast_fp16 = mul(x = var_1551_cast_fp16, y = var_1552_to_fp16)[name = tensor<string, []>("aw_chunk_127_cast_fp16")];
+            tensor<string, []> var_1555_equation_0 = const()[name = tensor<string, []>("op_1555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1555_cast_fp16 = einsum(equation = var_1555_equation_0, values = (var_1461_cast_fp16, var_1391_cast_fp16))[name = tensor<string, []>("op_1555_cast_fp16")];
+            tensor<fp16, []> var_1556_to_fp16 = const()[name = tensor<string, []>("op_1556_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_129_cast_fp16 = mul(x = var_1555_cast_fp16, y = var_1556_to_fp16)[name = tensor<string, []>("aw_chunk_129_cast_fp16")];
+            tensor<string, []> var_1559_equation_0 = const()[name = tensor<string, []>("op_1559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1559_cast_fp16 = einsum(equation = var_1559_equation_0, values = (var_1461_cast_fp16, var_1398_cast_fp16))[name = tensor<string, []>("op_1559_cast_fp16")];
+            tensor<fp16, []> var_1560_to_fp16 = const()[name = tensor<string, []>("op_1560_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_131_cast_fp16 = mul(x = var_1559_cast_fp16, y = var_1560_to_fp16)[name = tensor<string, []>("aw_chunk_131_cast_fp16")];
+            tensor<string, []> var_1563_equation_0 = const()[name = tensor<string, []>("op_1563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1563_cast_fp16 = einsum(equation = var_1563_equation_0, values = (var_1461_cast_fp16, var_1405_cast_fp16))[name = tensor<string, []>("op_1563_cast_fp16")];
+            tensor<fp16, []> var_1564_to_fp16 = const()[name = tensor<string, []>("op_1564_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_133_cast_fp16 = mul(x = var_1563_cast_fp16, y = var_1564_to_fp16)[name = tensor<string, []>("aw_chunk_133_cast_fp16")];
+            tensor<string, []> var_1567_equation_0 = const()[name = tensor<string, []>("op_1567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1567_cast_fp16 = einsum(equation = var_1567_equation_0, values = (var_1461_cast_fp16, var_1412_cast_fp16))[name = tensor<string, []>("op_1567_cast_fp16")];
+            tensor<fp16, []> var_1568_to_fp16 = const()[name = tensor<string, []>("op_1568_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_135_cast_fp16 = mul(x = var_1567_cast_fp16, y = var_1568_to_fp16)[name = tensor<string, []>("aw_chunk_135_cast_fp16")];
+            tensor<string, []> var_1571_equation_0 = const()[name = tensor<string, []>("op_1571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1571_cast_fp16 = einsum(equation = var_1571_equation_0, values = (var_1465_cast_fp16, var_1419_cast_fp16))[name = tensor<string, []>("op_1571_cast_fp16")];
+            tensor<fp16, []> var_1572_to_fp16 = const()[name = tensor<string, []>("op_1572_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_137_cast_fp16 = mul(x = var_1571_cast_fp16, y = var_1572_to_fp16)[name = tensor<string, []>("aw_chunk_137_cast_fp16")];
+            tensor<string, []> var_1575_equation_0 = const()[name = tensor<string, []>("op_1575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1575_cast_fp16 = einsum(equation = var_1575_equation_0, values = (var_1465_cast_fp16, var_1426_cast_fp16))[name = tensor<string, []>("op_1575_cast_fp16")];
+            tensor<fp16, []> var_1576_to_fp16 = const()[name = tensor<string, []>("op_1576_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_139_cast_fp16 = mul(x = var_1575_cast_fp16, y = var_1576_to_fp16)[name = tensor<string, []>("aw_chunk_139_cast_fp16")];
+            tensor<string, []> var_1579_equation_0 = const()[name = tensor<string, []>("op_1579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1579_cast_fp16 = einsum(equation = var_1579_equation_0, values = (var_1465_cast_fp16, var_1433_cast_fp16))[name = tensor<string, []>("op_1579_cast_fp16")];
+            tensor<fp16, []> var_1580_to_fp16 = const()[name = tensor<string, []>("op_1580_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_141_cast_fp16 = mul(x = var_1579_cast_fp16, y = var_1580_to_fp16)[name = tensor<string, []>("aw_chunk_141_cast_fp16")];
+            tensor<string, []> var_1583_equation_0 = const()[name = tensor<string, []>("op_1583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1583_cast_fp16 = einsum(equation = var_1583_equation_0, values = (var_1465_cast_fp16, var_1440_cast_fp16))[name = tensor<string, []>("op_1583_cast_fp16")];
+            tensor<fp16, []> var_1584_to_fp16 = const()[name = tensor<string, []>("op_1584_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_143_cast_fp16 = mul(x = var_1583_cast_fp16, y = var_1584_to_fp16)[name = tensor<string, []>("aw_chunk_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1586_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_97_cast_fp16)[name = tensor<string, []>("op_1586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1587_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_99_cast_fp16)[name = tensor<string, []>("op_1587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1588_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_101_cast_fp16)[name = tensor<string, []>("op_1588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1589_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_103_cast_fp16)[name = tensor<string, []>("op_1589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1590_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_105_cast_fp16)[name = tensor<string, []>("op_1590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1591_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_107_cast_fp16)[name = tensor<string, []>("op_1591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1592_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_109_cast_fp16)[name = tensor<string, []>("op_1592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1593_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_111_cast_fp16)[name = tensor<string, []>("op_1593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1594_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_113_cast_fp16)[name = tensor<string, []>("op_1594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1595_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_115_cast_fp16)[name = tensor<string, []>("op_1595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1596_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_117_cast_fp16)[name = tensor<string, []>("op_1596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1597_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_119_cast_fp16)[name = tensor<string, []>("op_1597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1598_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_121_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1599_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_123_cast_fp16)[name = tensor<string, []>("op_1599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1600_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_125_cast_fp16)[name = tensor<string, []>("op_1600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1601_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_127_cast_fp16)[name = tensor<string, []>("op_1601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1602_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_129_cast_fp16)[name = tensor<string, []>("op_1602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1603_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_131_cast_fp16)[name = tensor<string, []>("op_1603_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1604_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_133_cast_fp16)[name = tensor<string, []>("op_1604_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1605_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_135_cast_fp16)[name = tensor<string, []>("op_1605_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1606_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_137_cast_fp16)[name = tensor<string, []>("op_1606_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1607_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_139_cast_fp16)[name = tensor<string, []>("op_1607_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1608_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_141_cast_fp16)[name = tensor<string, []>("op_1608_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_1609_cast_fp16 = softmax(axis = var_1195, x = aw_chunk_143_cast_fp16)[name = tensor<string, []>("op_1609_cast_fp16")];
+            tensor<string, []> var_1611_equation_0 = const()[name = tensor<string, []>("op_1611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1611_cast_fp16 = einsum(equation = var_1611_equation_0, values = (var_1467_cast_fp16, var_1586_cast_fp16))[name = tensor<string, []>("op_1611_cast_fp16")];
+            tensor<string, []> var_1613_equation_0 = const()[name = tensor<string, []>("op_1613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1613_cast_fp16 = einsum(equation = var_1613_equation_0, values = (var_1467_cast_fp16, var_1587_cast_fp16))[name = tensor<string, []>("op_1613_cast_fp16")];
+            tensor<string, []> var_1615_equation_0 = const()[name = tensor<string, []>("op_1615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1615_cast_fp16 = einsum(equation = var_1615_equation_0, values = (var_1467_cast_fp16, var_1588_cast_fp16))[name = tensor<string, []>("op_1615_cast_fp16")];
+            tensor<string, []> var_1617_equation_0 = const()[name = tensor<string, []>("op_1617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1617_cast_fp16 = einsum(equation = var_1617_equation_0, values = (var_1467_cast_fp16, var_1589_cast_fp16))[name = tensor<string, []>("op_1617_cast_fp16")];
+            tensor<string, []> var_1619_equation_0 = const()[name = tensor<string, []>("op_1619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1619_cast_fp16 = einsum(equation = var_1619_equation_0, values = (var_1471_cast_fp16, var_1590_cast_fp16))[name = tensor<string, []>("op_1619_cast_fp16")];
+            tensor<string, []> var_1621_equation_0 = const()[name = tensor<string, []>("op_1621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1621_cast_fp16 = einsum(equation = var_1621_equation_0, values = (var_1471_cast_fp16, var_1591_cast_fp16))[name = tensor<string, []>("op_1621_cast_fp16")];
+            tensor<string, []> var_1623_equation_0 = const()[name = tensor<string, []>("op_1623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1623_cast_fp16 = einsum(equation = var_1623_equation_0, values = (var_1471_cast_fp16, var_1592_cast_fp16))[name = tensor<string, []>("op_1623_cast_fp16")];
+            tensor<string, []> var_1625_equation_0 = const()[name = tensor<string, []>("op_1625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1625_cast_fp16 = einsum(equation = var_1625_equation_0, values = (var_1471_cast_fp16, var_1593_cast_fp16))[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<string, []> var_1627_equation_0 = const()[name = tensor<string, []>("op_1627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1627_cast_fp16 = einsum(equation = var_1627_equation_0, values = (var_1475_cast_fp16, var_1594_cast_fp16))[name = tensor<string, []>("op_1627_cast_fp16")];
+            tensor<string, []> var_1629_equation_0 = const()[name = tensor<string, []>("op_1629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1629_cast_fp16 = einsum(equation = var_1629_equation_0, values = (var_1475_cast_fp16, var_1595_cast_fp16))[name = tensor<string, []>("op_1629_cast_fp16")];
+            tensor<string, []> var_1631_equation_0 = const()[name = tensor<string, []>("op_1631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1631_cast_fp16 = einsum(equation = var_1631_equation_0, values = (var_1475_cast_fp16, var_1596_cast_fp16))[name = tensor<string, []>("op_1631_cast_fp16")];
+            tensor<string, []> var_1633_equation_0 = const()[name = tensor<string, []>("op_1633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1633_cast_fp16 = einsum(equation = var_1633_equation_0, values = (var_1475_cast_fp16, var_1597_cast_fp16))[name = tensor<string, []>("op_1633_cast_fp16")];
+            tensor<string, []> var_1635_equation_0 = const()[name = tensor<string, []>("op_1635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1635_cast_fp16 = einsum(equation = var_1635_equation_0, values = (var_1479_cast_fp16, var_1598_cast_fp16))[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<string, []> var_1637_equation_0 = const()[name = tensor<string, []>("op_1637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1637_cast_fp16 = einsum(equation = var_1637_equation_0, values = (var_1479_cast_fp16, var_1599_cast_fp16))[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<string, []> var_1639_equation_0 = const()[name = tensor<string, []>("op_1639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1639_cast_fp16 = einsum(equation = var_1639_equation_0, values = (var_1479_cast_fp16, var_1600_cast_fp16))[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<string, []> var_1641_equation_0 = const()[name = tensor<string, []>("op_1641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1641_cast_fp16 = einsum(equation = var_1641_equation_0, values = (var_1479_cast_fp16, var_1601_cast_fp16))[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<string, []> var_1643_equation_0 = const()[name = tensor<string, []>("op_1643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1643_cast_fp16 = einsum(equation = var_1643_equation_0, values = (var_1483_cast_fp16, var_1602_cast_fp16))[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<string, []> var_1645_equation_0 = const()[name = tensor<string, []>("op_1645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1645_cast_fp16 = einsum(equation = var_1645_equation_0, values = (var_1483_cast_fp16, var_1603_cast_fp16))[name = tensor<string, []>("op_1645_cast_fp16")];
+            tensor<string, []> var_1647_equation_0 = const()[name = tensor<string, []>("op_1647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1647_cast_fp16 = einsum(equation = var_1647_equation_0, values = (var_1483_cast_fp16, var_1604_cast_fp16))[name = tensor<string, []>("op_1647_cast_fp16")];
+            tensor<string, []> var_1649_equation_0 = const()[name = tensor<string, []>("op_1649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1649_cast_fp16 = einsum(equation = var_1649_equation_0, values = (var_1483_cast_fp16, var_1605_cast_fp16))[name = tensor<string, []>("op_1649_cast_fp16")];
+            tensor<string, []> var_1651_equation_0 = const()[name = tensor<string, []>("op_1651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1651_cast_fp16 = einsum(equation = var_1651_equation_0, values = (var_1487_cast_fp16, var_1606_cast_fp16))[name = tensor<string, []>("op_1651_cast_fp16")];
+            tensor<string, []> var_1653_equation_0 = const()[name = tensor<string, []>("op_1653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1653_cast_fp16 = einsum(equation = var_1653_equation_0, values = (var_1487_cast_fp16, var_1607_cast_fp16))[name = tensor<string, []>("op_1653_cast_fp16")];
+            tensor<string, []> var_1655_equation_0 = const()[name = tensor<string, []>("op_1655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1655_cast_fp16 = einsum(equation = var_1655_equation_0, values = (var_1487_cast_fp16, var_1608_cast_fp16))[name = tensor<string, []>("op_1655_cast_fp16")];
+            tensor<string, []> var_1657_equation_0 = const()[name = tensor<string, []>("op_1657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_1657_cast_fp16 = einsum(equation = var_1657_equation_0, values = (var_1487_cast_fp16, var_1609_cast_fp16))[name = tensor<string, []>("op_1657_cast_fp16")];
+            tensor<bool, []> var_1659_interleave_0 = const()[name = tensor<string, []>("op_1659_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1659_cast_fp16 = concat(axis = var_1184, interleave = var_1659_interleave_0, values = (var_1611_cast_fp16, var_1613_cast_fp16, var_1615_cast_fp16, var_1617_cast_fp16))[name = tensor<string, []>("op_1659_cast_fp16")];
+            tensor<bool, []> var_1661_interleave_0 = const()[name = tensor<string, []>("op_1661_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1661_cast_fp16 = concat(axis = var_1184, interleave = var_1661_interleave_0, values = (var_1619_cast_fp16, var_1621_cast_fp16, var_1623_cast_fp16, var_1625_cast_fp16))[name = tensor<string, []>("op_1661_cast_fp16")];
+            tensor<bool, []> var_1663_interleave_0 = const()[name = tensor<string, []>("op_1663_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1663_cast_fp16 = concat(axis = var_1184, interleave = var_1663_interleave_0, values = (var_1627_cast_fp16, var_1629_cast_fp16, var_1631_cast_fp16, var_1633_cast_fp16))[name = tensor<string, []>("op_1663_cast_fp16")];
+            tensor<bool, []> var_1665_interleave_0 = const()[name = tensor<string, []>("op_1665_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1665_cast_fp16 = concat(axis = var_1184, interleave = var_1665_interleave_0, values = (var_1635_cast_fp16, var_1637_cast_fp16, var_1639_cast_fp16, var_1641_cast_fp16))[name = tensor<string, []>("op_1665_cast_fp16")];
+            tensor<bool, []> var_1667_interleave_0 = const()[name = tensor<string, []>("op_1667_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1667_cast_fp16 = concat(axis = var_1184, interleave = var_1667_interleave_0, values = (var_1643_cast_fp16, var_1645_cast_fp16, var_1647_cast_fp16, var_1649_cast_fp16))[name = tensor<string, []>("op_1667_cast_fp16")];
+            tensor<bool, []> var_1669_interleave_0 = const()[name = tensor<string, []>("op_1669_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1669_cast_fp16 = concat(axis = var_1184, interleave = var_1669_interleave_0, values = (var_1651_cast_fp16, var_1653_cast_fp16, var_1655_cast_fp16, var_1657_cast_fp16))[name = tensor<string, []>("op_1669_cast_fp16")];
+            tensor<bool, []> input_17_interleave_0 = const()[name = tensor<string, []>("input_17_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_17_cast_fp16 = concat(axis = var_1195, interleave = input_17_interleave_0, values = (var_1659_cast_fp16, var_1661_cast_fp16, var_1663_cast_fp16, var_1665_cast_fp16, var_1667_cast_fp16, var_1669_cast_fp16))[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<int32, [2]> var_1674 = const()[name = tensor<string, []>("op_1674"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1676 = const()[name = tensor<string, []>("op_1676"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10211136)))];
+            tensor<fp16, [384]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506112)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_1676, groups = var_1195, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_1674, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_1682 = const()[name = tensor<string, []>("op_1682"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_1682, keep_dims = var_1196, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_1686 = const()[name = tensor<string, []>("op_1686"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1687_cast_fp16 = reduce_mean(axes = var_1686, keep_dims = var_1196, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_1687_cast_fp16")];
+            tensor<fp16, []> var_1688_to_fp16 = const()[name = tensor<string, []>("op_1688_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1689_cast_fp16 = add(x = var_1687_cast_fp16, y = var_1688_to_fp16)[name = tensor<string, []>("op_1689_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_1689_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [384]> input_19_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_19_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506944)))];
+            tensor<fp16, [384]> input_19_beta_0_to_fp16 = const()[name = tensor<string, []>("input_19_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10507776)))];
+            tensor<fp16, []> input_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_1700 = const()[name = tensor<string, []>("op_1700"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1702 = const()[name = tensor<string, []>("op_1702"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10508608)))];
+            tensor<fp16, [1536]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11688320)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_1702, groups = var_1195, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = var_1700, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_1708 = const()[name = tensor<string, []>("op_1708"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1710 = const()[name = tensor<string, []>("op_1710"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11691456)))];
+            tensor<fp16, [384]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12871168)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_1710, groups = var_1195, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_1708, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_1717 = const()[name = tensor<string, []>("op_1717"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_1728 = const()[name = tensor<string, []>("op_1728"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1729 = const()[name = tensor<string, []>("op_1729"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_1739 = const()[name = tensor<string, []>("op_1739"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_1739, keep_dims = var_1729, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_1743 = const()[name = tensor<string, []>("op_1743"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1744_cast_fp16 = reduce_mean(axes = var_1743, keep_dims = var_1729, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_1744_cast_fp16")];
+            tensor<fp16, []> var_1745_to_fp16 = const()[name = tensor<string, []>("op_1745_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_1746_cast_fp16 = add(x = var_1744_cast_fp16, y = var_1745_to_fp16)[name = tensor<string, []>("op_1746_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_1746_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [384]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12872000)))];
+            tensor<fp16, [384]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12872832)))];
+            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [2]> var_1761 = const()[name = tensor<string, []>("op_1761"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1763 = const()[name = tensor<string, []>("op_1763"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12873664)))];
+            tensor<fp16, [384]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13168640)))];
+            tensor<fp16, [1, 384, 1, 1500]> query_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_1763, groups = var_1728, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_1761, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_1767 = const()[name = tensor<string, []>("op_1767"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1769 = const()[name = tensor<string, []>("op_1769"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13169472)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_cast_fp16 = conv(dilations = var_1769, groups = var_1728, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_1767, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_1774 = const()[name = tensor<string, []>("op_1774"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1776 = const()[name = tensor<string, []>("op_1776"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13464448)))];
+            tensor<fp16, [384]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13759424)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_1776, groups = var_1728, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_1774, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_1783_begin_0 = const()[name = tensor<string, []>("op_1783_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1783_end_0 = const()[name = tensor<string, []>("op_1783_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1783_end_mask_0 = const()[name = tensor<string, []>("op_1783_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1783_cast_fp16 = slice_by_index(begin = var_1783_begin_0, end = var_1783_end_0, end_mask = var_1783_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1783_cast_fp16")];
+            tensor<int32, [4]> var_1787_begin_0 = const()[name = tensor<string, []>("op_1787_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_1787_end_0 = const()[name = tensor<string, []>("op_1787_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_1787_end_mask_0 = const()[name = tensor<string, []>("op_1787_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1787_cast_fp16")];
+            tensor<int32, [4]> var_1791_begin_0 = const()[name = tensor<string, []>("op_1791_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_1791_end_0 = const()[name = tensor<string, []>("op_1791_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_1791_end_mask_0 = const()[name = tensor<string, []>("op_1791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1791_cast_fp16 = slice_by_index(begin = var_1791_begin_0, end = var_1791_end_0, end_mask = var_1791_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1791_cast_fp16")];
+            tensor<int32, [4]> var_1795_begin_0 = const()[name = tensor<string, []>("op_1795_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_1795_end_0 = const()[name = tensor<string, []>("op_1795_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_1795_end_mask_0 = const()[name = tensor<string, []>("op_1795_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1795_cast_fp16 = slice_by_index(begin = var_1795_begin_0, end = var_1795_end_0, end_mask = var_1795_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1795_cast_fp16")];
+            tensor<int32, [4]> var_1799_begin_0 = const()[name = tensor<string, []>("op_1799_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_1799_end_0 = const()[name = tensor<string, []>("op_1799_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_1799_end_mask_0 = const()[name = tensor<string, []>("op_1799_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1799_cast_fp16 = slice_by_index(begin = var_1799_begin_0, end = var_1799_end_0, end_mask = var_1799_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1799_cast_fp16")];
+            tensor<int32, [4]> var_1803_begin_0 = const()[name = tensor<string, []>("op_1803_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_1803_end_0 = const()[name = tensor<string, []>("op_1803_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_1803_end_mask_0 = const()[name = tensor<string, []>("op_1803_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16 = slice_by_index(begin = var_1803_begin_0, end = var_1803_end_0, end_mask = var_1803_end_mask_0, x = query_cast_fp16)[name = tensor<string, []>("op_1803_cast_fp16")];
+            tensor<int32, [4]> var_1812_begin_0 = const()[name = tensor<string, []>("op_1812_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1812_end_0 = const()[name = tensor<string, []>("op_1812_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1812_end_mask_0 = const()[name = tensor<string, []>("op_1812_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1812_cast_fp16 = slice_by_index(begin = var_1812_begin_0, end = var_1812_end_0, end_mask = var_1812_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1812_cast_fp16")];
+            tensor<int32, [4]> var_1819_begin_0 = const()[name = tensor<string, []>("op_1819_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1819_end_0 = const()[name = tensor<string, []>("op_1819_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1819_end_mask_0 = const()[name = tensor<string, []>("op_1819_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1819_cast_fp16 = slice_by_index(begin = var_1819_begin_0, end = var_1819_end_0, end_mask = var_1819_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1819_cast_fp16")];
+            tensor<int32, [4]> var_1826_begin_0 = const()[name = tensor<string, []>("op_1826_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1826_end_0 = const()[name = tensor<string, []>("op_1826_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1826_end_mask_0 = const()[name = tensor<string, []>("op_1826_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1826_cast_fp16 = slice_by_index(begin = var_1826_begin_0, end = var_1826_end_0, end_mask = var_1826_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1826_cast_fp16")];
+            tensor<int32, [4]> var_1833_begin_0 = const()[name = tensor<string, []>("op_1833_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1833_end_0 = const()[name = tensor<string, []>("op_1833_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1833_end_mask_0 = const()[name = tensor<string, []>("op_1833_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1833_cast_fp16 = slice_by_index(begin = var_1833_begin_0, end = var_1833_end_0, end_mask = var_1833_end_mask_0, x = var_1783_cast_fp16)[name = tensor<string, []>("op_1833_cast_fp16")];
+            tensor<int32, [4]> var_1840_begin_0 = const()[name = tensor<string, []>("op_1840_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1840_end_0 = const()[name = tensor<string, []>("op_1840_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1840_end_mask_0 = const()[name = tensor<string, []>("op_1840_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1840_cast_fp16 = slice_by_index(begin = var_1840_begin_0, end = var_1840_end_0, end_mask = var_1840_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<int32, [4]> var_1847_begin_0 = const()[name = tensor<string, []>("op_1847_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1847_end_0 = const()[name = tensor<string, []>("op_1847_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1847_end_mask_0 = const()[name = tensor<string, []>("op_1847_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1847_cast_fp16 = slice_by_index(begin = var_1847_begin_0, end = var_1847_end_0, end_mask = var_1847_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
+            tensor<int32, [4]> var_1854_begin_0 = const()[name = tensor<string, []>("op_1854_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1854_end_0 = const()[name = tensor<string, []>("op_1854_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1854_end_mask_0 = const()[name = tensor<string, []>("op_1854_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1854_cast_fp16 = slice_by_index(begin = var_1854_begin_0, end = var_1854_end_0, end_mask = var_1854_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1854_cast_fp16")];
+            tensor<int32, [4]> var_1861_begin_0 = const()[name = tensor<string, []>("op_1861_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1861_end_0 = const()[name = tensor<string, []>("op_1861_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1861_end_mask_0 = const()[name = tensor<string, []>("op_1861_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1861_cast_fp16 = slice_by_index(begin = var_1861_begin_0, end = var_1861_end_0, end_mask = var_1861_end_mask_0, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<int32, [4]> var_1868_begin_0 = const()[name = tensor<string, []>("op_1868_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1868_end_0 = const()[name = tensor<string, []>("op_1868_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1868_end_mask_0 = const()[name = tensor<string, []>("op_1868_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1868_cast_fp16 = slice_by_index(begin = var_1868_begin_0, end = var_1868_end_0, end_mask = var_1868_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1868_cast_fp16")];
+            tensor<int32, [4]> var_1875_begin_0 = const()[name = tensor<string, []>("op_1875_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1875_end_0 = const()[name = tensor<string, []>("op_1875_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1875_end_mask_0 = const()[name = tensor<string, []>("op_1875_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1875_cast_fp16 = slice_by_index(begin = var_1875_begin_0, end = var_1875_end_0, end_mask = var_1875_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1875_cast_fp16")];
+            tensor<int32, [4]> var_1882_begin_0 = const()[name = tensor<string, []>("op_1882_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1882_end_0 = const()[name = tensor<string, []>("op_1882_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1882_end_mask_0 = const()[name = tensor<string, []>("op_1882_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1882_cast_fp16 = slice_by_index(begin = var_1882_begin_0, end = var_1882_end_0, end_mask = var_1882_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1882_cast_fp16")];
+            tensor<int32, [4]> var_1889_begin_0 = const()[name = tensor<string, []>("op_1889_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1889_end_0 = const()[name = tensor<string, []>("op_1889_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1889_end_mask_0 = const()[name = tensor<string, []>("op_1889_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1889_cast_fp16 = slice_by_index(begin = var_1889_begin_0, end = var_1889_end_0, end_mask = var_1889_end_mask_0, x = var_1791_cast_fp16)[name = tensor<string, []>("op_1889_cast_fp16")];
+            tensor<int32, [4]> var_1896_begin_0 = const()[name = tensor<string, []>("op_1896_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1896_end_0 = const()[name = tensor<string, []>("op_1896_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1896_end_mask_0 = const()[name = tensor<string, []>("op_1896_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1896_cast_fp16 = slice_by_index(begin = var_1896_begin_0, end = var_1896_end_0, end_mask = var_1896_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<int32, [4]> var_1903_begin_0 = const()[name = tensor<string, []>("op_1903_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1903_end_0 = const()[name = tensor<string, []>("op_1903_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1903_end_mask_0 = const()[name = tensor<string, []>("op_1903_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1903_cast_fp16 = slice_by_index(begin = var_1903_begin_0, end = var_1903_end_0, end_mask = var_1903_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<int32, [4]> var_1910_begin_0 = const()[name = tensor<string, []>("op_1910_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1910_end_0 = const()[name = tensor<string, []>("op_1910_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1910_end_mask_0 = const()[name = tensor<string, []>("op_1910_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1910_cast_fp16 = slice_by_index(begin = var_1910_begin_0, end = var_1910_end_0, end_mask = var_1910_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<int32, [4]> var_1917_begin_0 = const()[name = tensor<string, []>("op_1917_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1917_end_0 = const()[name = tensor<string, []>("op_1917_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1917_end_mask_0 = const()[name = tensor<string, []>("op_1917_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1917_cast_fp16 = slice_by_index(begin = var_1917_begin_0, end = var_1917_end_0, end_mask = var_1917_end_mask_0, x = var_1795_cast_fp16)[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<int32, [4]> var_1924_begin_0 = const()[name = tensor<string, []>("op_1924_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1924_end_0 = const()[name = tensor<string, []>("op_1924_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1924_end_mask_0 = const()[name = tensor<string, []>("op_1924_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = var_1924_end_0, end_mask = var_1924_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<int32, [4]> var_1931_begin_0 = const()[name = tensor<string, []>("op_1931_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1931_end_0 = const()[name = tensor<string, []>("op_1931_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1931_end_mask_0 = const()[name = tensor<string, []>("op_1931_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1931_cast_fp16 = slice_by_index(begin = var_1931_begin_0, end = var_1931_end_0, end_mask = var_1931_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1931_cast_fp16")];
+            tensor<int32, [4]> var_1938_begin_0 = const()[name = tensor<string, []>("op_1938_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1938_end_0 = const()[name = tensor<string, []>("op_1938_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1938_end_mask_0 = const()[name = tensor<string, []>("op_1938_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1938_cast_fp16 = slice_by_index(begin = var_1938_begin_0, end = var_1938_end_0, end_mask = var_1938_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1938_cast_fp16")];
+            tensor<int32, [4]> var_1945_begin_0 = const()[name = tensor<string, []>("op_1945_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1945_end_0 = const()[name = tensor<string, []>("op_1945_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1945_end_mask_0 = const()[name = tensor<string, []>("op_1945_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1945_cast_fp16 = slice_by_index(begin = var_1945_begin_0, end = var_1945_end_0, end_mask = var_1945_end_mask_0, x = var_1799_cast_fp16)[name = tensor<string, []>("op_1945_cast_fp16")];
+            tensor<int32, [4]> var_1952_begin_0 = const()[name = tensor<string, []>("op_1952_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1952_end_0 = const()[name = tensor<string, []>("op_1952_end_0"), val = tensor<int32, [4]>([1, 64, 1, 375])];
+            tensor<bool, [4]> var_1952_end_mask_0 = const()[name = tensor<string, []>("op_1952_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1952_cast_fp16 = slice_by_index(begin = var_1952_begin_0, end = var_1952_end_0, end_mask = var_1952_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1952_cast_fp16")];
+            tensor<int32, [4]> var_1959_begin_0 = const()[name = tensor<string, []>("op_1959_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 375])];
+            tensor<int32, [4]> var_1959_end_0 = const()[name = tensor<string, []>("op_1959_end_0"), val = tensor<int32, [4]>([1, 64, 1, 750])];
+            tensor<bool, [4]> var_1959_end_mask_0 = const()[name = tensor<string, []>("op_1959_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1959_cast_fp16 = slice_by_index(begin = var_1959_begin_0, end = var_1959_end_0, end_mask = var_1959_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1959_cast_fp16")];
+            tensor<int32, [4]> var_1966_begin_0 = const()[name = tensor<string, []>("op_1966_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 750])];
+            tensor<int32, [4]> var_1966_end_0 = const()[name = tensor<string, []>("op_1966_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1125])];
+            tensor<bool, [4]> var_1966_end_mask_0 = const()[name = tensor<string, []>("op_1966_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1966_cast_fp16 = slice_by_index(begin = var_1966_begin_0, end = var_1966_end_0, end_mask = var_1966_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1966_cast_fp16")];
+            tensor<int32, [4]> var_1973_begin_0 = const()[name = tensor<string, []>("op_1973_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 1125])];
+            tensor<int32, [4]> var_1973_end_0 = const()[name = tensor<string, []>("op_1973_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_1973_end_mask_0 = const()[name = tensor<string, []>("op_1973_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 64, 1, 375]> var_1973_cast_fp16 = slice_by_index(begin = var_1973_begin_0, end = var_1973_end_0, end_mask = var_1973_end_mask_0, x = var_1803_cast_fp16)[name = tensor<string, []>("op_1973_cast_fp16")];
+            tensor<int32, [4]> k_perm_0 = const()[name = tensor<string, []>("k_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [4]> var_1978_begin_0 = const()[name = tensor<string, []>("op_1978_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1978_end_0 = const()[name = tensor<string, []>("op_1978_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 64])];
+            tensor<bool, [4]> var_1978_end_mask_0 = const()[name = tensor<string, []>("op_1978_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 384]> transpose_0 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1978_cast_fp16 = slice_by_index(begin = var_1978_begin_0, end = var_1978_end_0, end_mask = var_1978_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<int32, [4]> var_1982_begin_0 = const()[name = tensor<string, []>("op_1982_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
+            tensor<int32, [4]> var_1982_end_0 = const()[name = tensor<string, []>("op_1982_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 128])];
+            tensor<bool, [4]> var_1982_end_mask_0 = const()[name = tensor<string, []>("op_1982_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1982_cast_fp16 = slice_by_index(begin = var_1982_begin_0, end = var_1982_end_0, end_mask = var_1982_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_1982_cast_fp16")];
+            tensor<int32, [4]> var_1986_begin_0 = const()[name = tensor<string, []>("op_1986_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 128])];
+            tensor<int32, [4]> var_1986_end_0 = const()[name = tensor<string, []>("op_1986_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 192])];
+            tensor<bool, [4]> var_1986_end_mask_0 = const()[name = tensor<string, []>("op_1986_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1986_cast_fp16 = slice_by_index(begin = var_1986_begin_0, end = var_1986_end_0, end_mask = var_1986_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_1986_cast_fp16")];
+            tensor<int32, [4]> var_1990_begin_0 = const()[name = tensor<string, []>("op_1990_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 192])];
+            tensor<int32, [4]> var_1990_end_0 = const()[name = tensor<string, []>("op_1990_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 256])];
+            tensor<bool, [4]> var_1990_end_mask_0 = const()[name = tensor<string, []>("op_1990_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1990_cast_fp16 = slice_by_index(begin = var_1990_begin_0, end = var_1990_end_0, end_mask = var_1990_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_1990_cast_fp16")];
+            tensor<int32, [4]> var_1994_begin_0 = const()[name = tensor<string, []>("op_1994_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 256])];
+            tensor<int32, [4]> var_1994_end_0 = const()[name = tensor<string, []>("op_1994_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 320])];
+            tensor<bool, [4]> var_1994_end_mask_0 = const()[name = tensor<string, []>("op_1994_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1994_cast_fp16 = slice_by_index(begin = var_1994_begin_0, end = var_1994_end_0, end_mask = var_1994_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_1994_cast_fp16")];
+            tensor<int32, [4]> var_1998_begin_0 = const()[name = tensor<string, []>("op_1998_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 320])];
+            tensor<int32, [4]> var_1998_end_0 = const()[name = tensor<string, []>("op_1998_end_0"), val = tensor<int32, [4]>([1, 1500, 1, 384])];
+            tensor<bool, [4]> var_1998_end_mask_0 = const()[name = tensor<string, []>("op_1998_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
+            tensor<fp16, [1, 1500, 1, 64]> var_1998_cast_fp16 = slice_by_index(begin = var_1998_begin_0, end = var_1998_end_0, end_mask = var_1998_end_mask_0, x = transpose_0)[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<int32, [4]> var_2000_begin_0 = const()[name = tensor<string, []>("op_2000_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_2000_end_0 = const()[name = tensor<string, []>("op_2000_end_0"), val = tensor<int32, [4]>([1, 64, 1, 1500])];
+            tensor<bool, [4]> var_2000_end_mask_0 = const()[name = tensor<string, []>("op_2000_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2000_cast_fp16 = slice_by_index(begin = var_2000_begin_0, end = var_2000_end_0, end_mask = var_2000_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2000_cast_fp16")];
+            tensor<int32, [4]> var_2004_begin_0 = const()[name = tensor<string, []>("op_2004_begin_0"), val = tensor<int32, [4]>([0, 64, 0, 0])];
+            tensor<int32, [4]> var_2004_end_0 = const()[name = tensor<string, []>("op_2004_end_0"), val = tensor<int32, [4]>([1, 128, 1, 1500])];
+            tensor<bool, [4]> var_2004_end_mask_0 = const()[name = tensor<string, []>("op_2004_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2004_cast_fp16")];
+            tensor<int32, [4]> var_2008_begin_0 = const()[name = tensor<string, []>("op_2008_begin_0"), val = tensor<int32, [4]>([0, 128, 0, 0])];
+            tensor<int32, [4]> var_2008_end_0 = const()[name = tensor<string, []>("op_2008_end_0"), val = tensor<int32, [4]>([1, 192, 1, 1500])];
+            tensor<bool, [4]> var_2008_end_mask_0 = const()[name = tensor<string, []>("op_2008_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16 = slice_by_index(begin = var_2008_begin_0, end = var_2008_end_0, end_mask = var_2008_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
+            tensor<int32, [4]> var_2012_begin_0 = const()[name = tensor<string, []>("op_2012_begin_0"), val = tensor<int32, [4]>([0, 192, 0, 0])];
+            tensor<int32, [4]> var_2012_end_0 = const()[name = tensor<string, []>("op_2012_end_0"), val = tensor<int32, [4]>([1, 256, 1, 1500])];
+            tensor<bool, [4]> var_2012_end_mask_0 = const()[name = tensor<string, []>("op_2012_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2012_cast_fp16 = slice_by_index(begin = var_2012_begin_0, end = var_2012_end_0, end_mask = var_2012_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2012_cast_fp16")];
+            tensor<int32, [4]> var_2016_begin_0 = const()[name = tensor<string, []>("op_2016_begin_0"), val = tensor<int32, [4]>([0, 256, 0, 0])];
+            tensor<int32, [4]> var_2016_end_0 = const()[name = tensor<string, []>("op_2016_end_0"), val = tensor<int32, [4]>([1, 320, 1, 1500])];
+            tensor<bool, [4]> var_2016_end_mask_0 = const()[name = tensor<string, []>("op_2016_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2016_cast_fp16 = slice_by_index(begin = var_2016_begin_0, end = var_2016_end_0, end_mask = var_2016_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2016_cast_fp16")];
+            tensor<int32, [4]> var_2020_begin_0 = const()[name = tensor<string, []>("op_2020_begin_0"), val = tensor<int32, [4]>([0, 320, 0, 0])];
+            tensor<int32, [4]> var_2020_end_0 = const()[name = tensor<string, []>("op_2020_end_0"), val = tensor<int32, [4]>([1, 384, 1, 1500])];
+            tensor<bool, [4]> var_2020_end_mask_0 = const()[name = tensor<string, []>("op_2020_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 64, 1, 1500]> var_2020_cast_fp16 = slice_by_index(begin = var_2020_begin_0, end = var_2020_end_0, end_mask = var_2020_end_mask_0, x = value_cast_fp16)[name = tensor<string, []>("op_2020_cast_fp16")];
+            tensor<string, []> var_2024_equation_0 = const()[name = tensor<string, []>("op_2024_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2024_cast_fp16 = einsum(equation = var_2024_equation_0, values = (var_1978_cast_fp16, var_1812_cast_fp16))[name = tensor<string, []>("op_2024_cast_fp16")];
+            tensor<fp16, []> var_2025_to_fp16 = const()[name = tensor<string, []>("op_2025_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_145_cast_fp16 = mul(x = var_2024_cast_fp16, y = var_2025_to_fp16)[name = tensor<string, []>("aw_chunk_145_cast_fp16")];
+            tensor<string, []> var_2028_equation_0 = const()[name = tensor<string, []>("op_2028_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2028_cast_fp16 = einsum(equation = var_2028_equation_0, values = (var_1978_cast_fp16, var_1819_cast_fp16))[name = tensor<string, []>("op_2028_cast_fp16")];
+            tensor<fp16, []> var_2029_to_fp16 = const()[name = tensor<string, []>("op_2029_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_147_cast_fp16 = mul(x = var_2028_cast_fp16, y = var_2029_to_fp16)[name = tensor<string, []>("aw_chunk_147_cast_fp16")];
+            tensor<string, []> var_2032_equation_0 = const()[name = tensor<string, []>("op_2032_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2032_cast_fp16 = einsum(equation = var_2032_equation_0, values = (var_1978_cast_fp16, var_1826_cast_fp16))[name = tensor<string, []>("op_2032_cast_fp16")];
+            tensor<fp16, []> var_2033_to_fp16 = const()[name = tensor<string, []>("op_2033_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_149_cast_fp16 = mul(x = var_2032_cast_fp16, y = var_2033_to_fp16)[name = tensor<string, []>("aw_chunk_149_cast_fp16")];
+            tensor<string, []> var_2036_equation_0 = const()[name = tensor<string, []>("op_2036_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2036_cast_fp16 = einsum(equation = var_2036_equation_0, values = (var_1978_cast_fp16, var_1833_cast_fp16))[name = tensor<string, []>("op_2036_cast_fp16")];
+            tensor<fp16, []> var_2037_to_fp16 = const()[name = tensor<string, []>("op_2037_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_151_cast_fp16 = mul(x = var_2036_cast_fp16, y = var_2037_to_fp16)[name = tensor<string, []>("aw_chunk_151_cast_fp16")];
+            tensor<string, []> var_2040_equation_0 = const()[name = tensor<string, []>("op_2040_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2040_cast_fp16 = einsum(equation = var_2040_equation_0, values = (var_1982_cast_fp16, var_1840_cast_fp16))[name = tensor<string, []>("op_2040_cast_fp16")];
+            tensor<fp16, []> var_2041_to_fp16 = const()[name = tensor<string, []>("op_2041_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_153_cast_fp16 = mul(x = var_2040_cast_fp16, y = var_2041_to_fp16)[name = tensor<string, []>("aw_chunk_153_cast_fp16")];
+            tensor<string, []> var_2044_equation_0 = const()[name = tensor<string, []>("op_2044_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2044_cast_fp16 = einsum(equation = var_2044_equation_0, values = (var_1982_cast_fp16, var_1847_cast_fp16))[name = tensor<string, []>("op_2044_cast_fp16")];
+            tensor<fp16, []> var_2045_to_fp16 = const()[name = tensor<string, []>("op_2045_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_155_cast_fp16 = mul(x = var_2044_cast_fp16, y = var_2045_to_fp16)[name = tensor<string, []>("aw_chunk_155_cast_fp16")];
+            tensor<string, []> var_2048_equation_0 = const()[name = tensor<string, []>("op_2048_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2048_cast_fp16 = einsum(equation = var_2048_equation_0, values = (var_1982_cast_fp16, var_1854_cast_fp16))[name = tensor<string, []>("op_2048_cast_fp16")];
+            tensor<fp16, []> var_2049_to_fp16 = const()[name = tensor<string, []>("op_2049_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_157_cast_fp16 = mul(x = var_2048_cast_fp16, y = var_2049_to_fp16)[name = tensor<string, []>("aw_chunk_157_cast_fp16")];
+            tensor<string, []> var_2052_equation_0 = const()[name = tensor<string, []>("op_2052_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2052_cast_fp16 = einsum(equation = var_2052_equation_0, values = (var_1982_cast_fp16, var_1861_cast_fp16))[name = tensor<string, []>("op_2052_cast_fp16")];
+            tensor<fp16, []> var_2053_to_fp16 = const()[name = tensor<string, []>("op_2053_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_159_cast_fp16 = mul(x = var_2052_cast_fp16, y = var_2053_to_fp16)[name = tensor<string, []>("aw_chunk_159_cast_fp16")];
+            tensor<string, []> var_2056_equation_0 = const()[name = tensor<string, []>("op_2056_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2056_cast_fp16 = einsum(equation = var_2056_equation_0, values = (var_1986_cast_fp16, var_1868_cast_fp16))[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<fp16, []> var_2057_to_fp16 = const()[name = tensor<string, []>("op_2057_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_161_cast_fp16 = mul(x = var_2056_cast_fp16, y = var_2057_to_fp16)[name = tensor<string, []>("aw_chunk_161_cast_fp16")];
+            tensor<string, []> var_2060_equation_0 = const()[name = tensor<string, []>("op_2060_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2060_cast_fp16 = einsum(equation = var_2060_equation_0, values = (var_1986_cast_fp16, var_1875_cast_fp16))[name = tensor<string, []>("op_2060_cast_fp16")];
+            tensor<fp16, []> var_2061_to_fp16 = const()[name = tensor<string, []>("op_2061_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_163_cast_fp16 = mul(x = var_2060_cast_fp16, y = var_2061_to_fp16)[name = tensor<string, []>("aw_chunk_163_cast_fp16")];
+            tensor<string, []> var_2064_equation_0 = const()[name = tensor<string, []>("op_2064_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2064_cast_fp16 = einsum(equation = var_2064_equation_0, values = (var_1986_cast_fp16, var_1882_cast_fp16))[name = tensor<string, []>("op_2064_cast_fp16")];
+            tensor<fp16, []> var_2065_to_fp16 = const()[name = tensor<string, []>("op_2065_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_165_cast_fp16 = mul(x = var_2064_cast_fp16, y = var_2065_to_fp16)[name = tensor<string, []>("aw_chunk_165_cast_fp16")];
+            tensor<string, []> var_2068_equation_0 = const()[name = tensor<string, []>("op_2068_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2068_cast_fp16 = einsum(equation = var_2068_equation_0, values = (var_1986_cast_fp16, var_1889_cast_fp16))[name = tensor<string, []>("op_2068_cast_fp16")];
+            tensor<fp16, []> var_2069_to_fp16 = const()[name = tensor<string, []>("op_2069_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_167_cast_fp16 = mul(x = var_2068_cast_fp16, y = var_2069_to_fp16)[name = tensor<string, []>("aw_chunk_167_cast_fp16")];
+            tensor<string, []> var_2072_equation_0 = const()[name = tensor<string, []>("op_2072_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2072_cast_fp16 = einsum(equation = var_2072_equation_0, values = (var_1990_cast_fp16, var_1896_cast_fp16))[name = tensor<string, []>("op_2072_cast_fp16")];
+            tensor<fp16, []> var_2073_to_fp16 = const()[name = tensor<string, []>("op_2073_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_169_cast_fp16 = mul(x = var_2072_cast_fp16, y = var_2073_to_fp16)[name = tensor<string, []>("aw_chunk_169_cast_fp16")];
+            tensor<string, []> var_2076_equation_0 = const()[name = tensor<string, []>("op_2076_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2076_cast_fp16 = einsum(equation = var_2076_equation_0, values = (var_1990_cast_fp16, var_1903_cast_fp16))[name = tensor<string, []>("op_2076_cast_fp16")];
+            tensor<fp16, []> var_2077_to_fp16 = const()[name = tensor<string, []>("op_2077_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_171_cast_fp16 = mul(x = var_2076_cast_fp16, y = var_2077_to_fp16)[name = tensor<string, []>("aw_chunk_171_cast_fp16")];
+            tensor<string, []> var_2080_equation_0 = const()[name = tensor<string, []>("op_2080_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2080_cast_fp16 = einsum(equation = var_2080_equation_0, values = (var_1990_cast_fp16, var_1910_cast_fp16))[name = tensor<string, []>("op_2080_cast_fp16")];
+            tensor<fp16, []> var_2081_to_fp16 = const()[name = tensor<string, []>("op_2081_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_173_cast_fp16 = mul(x = var_2080_cast_fp16, y = var_2081_to_fp16)[name = tensor<string, []>("aw_chunk_173_cast_fp16")];
+            tensor<string, []> var_2084_equation_0 = const()[name = tensor<string, []>("op_2084_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2084_cast_fp16 = einsum(equation = var_2084_equation_0, values = (var_1990_cast_fp16, var_1917_cast_fp16))[name = tensor<string, []>("op_2084_cast_fp16")];
+            tensor<fp16, []> var_2085_to_fp16 = const()[name = tensor<string, []>("op_2085_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_175_cast_fp16 = mul(x = var_2084_cast_fp16, y = var_2085_to_fp16)[name = tensor<string, []>("aw_chunk_175_cast_fp16")];
+            tensor<string, []> var_2088_equation_0 = const()[name = tensor<string, []>("op_2088_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2088_cast_fp16 = einsum(equation = var_2088_equation_0, values = (var_1994_cast_fp16, var_1924_cast_fp16))[name = tensor<string, []>("op_2088_cast_fp16")];
+            tensor<fp16, []> var_2089_to_fp16 = const()[name = tensor<string, []>("op_2089_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_177_cast_fp16 = mul(x = var_2088_cast_fp16, y = var_2089_to_fp16)[name = tensor<string, []>("aw_chunk_177_cast_fp16")];
+            tensor<string, []> var_2092_equation_0 = const()[name = tensor<string, []>("op_2092_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2092_cast_fp16 = einsum(equation = var_2092_equation_0, values = (var_1994_cast_fp16, var_1931_cast_fp16))[name = tensor<string, []>("op_2092_cast_fp16")];
+            tensor<fp16, []> var_2093_to_fp16 = const()[name = tensor<string, []>("op_2093_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_179_cast_fp16 = mul(x = var_2092_cast_fp16, y = var_2093_to_fp16)[name = tensor<string, []>("aw_chunk_179_cast_fp16")];
+            tensor<string, []> var_2096_equation_0 = const()[name = tensor<string, []>("op_2096_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2096_cast_fp16 = einsum(equation = var_2096_equation_0, values = (var_1994_cast_fp16, var_1938_cast_fp16))[name = tensor<string, []>("op_2096_cast_fp16")];
+            tensor<fp16, []> var_2097_to_fp16 = const()[name = tensor<string, []>("op_2097_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_181_cast_fp16 = mul(x = var_2096_cast_fp16, y = var_2097_to_fp16)[name = tensor<string, []>("aw_chunk_181_cast_fp16")];
+            tensor<string, []> var_2100_equation_0 = const()[name = tensor<string, []>("op_2100_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2100_cast_fp16 = einsum(equation = var_2100_equation_0, values = (var_1994_cast_fp16, var_1945_cast_fp16))[name = tensor<string, []>("op_2100_cast_fp16")];
+            tensor<fp16, []> var_2101_to_fp16 = const()[name = tensor<string, []>("op_2101_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_183_cast_fp16 = mul(x = var_2100_cast_fp16, y = var_2101_to_fp16)[name = tensor<string, []>("aw_chunk_183_cast_fp16")];
+            tensor<string, []> var_2104_equation_0 = const()[name = tensor<string, []>("op_2104_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2104_cast_fp16 = einsum(equation = var_2104_equation_0, values = (var_1998_cast_fp16, var_1952_cast_fp16))[name = tensor<string, []>("op_2104_cast_fp16")];
+            tensor<fp16, []> var_2105_to_fp16 = const()[name = tensor<string, []>("op_2105_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_185_cast_fp16 = mul(x = var_2104_cast_fp16, y = var_2105_to_fp16)[name = tensor<string, []>("aw_chunk_185_cast_fp16")];
+            tensor<string, []> var_2108_equation_0 = const()[name = tensor<string, []>("op_2108_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2108_cast_fp16 = einsum(equation = var_2108_equation_0, values = (var_1998_cast_fp16, var_1959_cast_fp16))[name = tensor<string, []>("op_2108_cast_fp16")];
+            tensor<fp16, []> var_2109_to_fp16 = const()[name = tensor<string, []>("op_2109_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_187_cast_fp16 = mul(x = var_2108_cast_fp16, y = var_2109_to_fp16)[name = tensor<string, []>("aw_chunk_187_cast_fp16")];
+            tensor<string, []> var_2112_equation_0 = const()[name = tensor<string, []>("op_2112_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2112_cast_fp16 = einsum(equation = var_2112_equation_0, values = (var_1998_cast_fp16, var_1966_cast_fp16))[name = tensor<string, []>("op_2112_cast_fp16")];
+            tensor<fp16, []> var_2113_to_fp16 = const()[name = tensor<string, []>("op_2113_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_189_cast_fp16 = mul(x = var_2112_cast_fp16, y = var_2113_to_fp16)[name = tensor<string, []>("aw_chunk_189_cast_fp16")];
+            tensor<string, []> var_2116_equation_0 = const()[name = tensor<string, []>("op_2116_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2116_cast_fp16 = einsum(equation = var_2116_equation_0, values = (var_1998_cast_fp16, var_1973_cast_fp16))[name = tensor<string, []>("op_2116_cast_fp16")];
+            tensor<fp16, []> var_2117_to_fp16 = const()[name = tensor<string, []>("op_2117_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 1500, 1, 375]> aw_chunk_cast_fp16 = mul(x = var_2116_cast_fp16, y = var_2117_to_fp16)[name = tensor<string, []>("aw_chunk_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2119_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_145_cast_fp16)[name = tensor<string, []>("op_2119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2120_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_147_cast_fp16)[name = tensor<string, []>("op_2120_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2121_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_149_cast_fp16)[name = tensor<string, []>("op_2121_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2122_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_151_cast_fp16)[name = tensor<string, []>("op_2122_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2123_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_153_cast_fp16)[name = tensor<string, []>("op_2123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2124_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_155_cast_fp16)[name = tensor<string, []>("op_2124_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2125_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_157_cast_fp16)[name = tensor<string, []>("op_2125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2126_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_159_cast_fp16)[name = tensor<string, []>("op_2126_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2127_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_161_cast_fp16)[name = tensor<string, []>("op_2127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2128_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_163_cast_fp16)[name = tensor<string, []>("op_2128_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2129_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_165_cast_fp16)[name = tensor<string, []>("op_2129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2130_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_167_cast_fp16)[name = tensor<string, []>("op_2130_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2131_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_169_cast_fp16)[name = tensor<string, []>("op_2131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2132_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_171_cast_fp16)[name = tensor<string, []>("op_2132_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2133_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_173_cast_fp16)[name = tensor<string, []>("op_2133_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2134_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_175_cast_fp16)[name = tensor<string, []>("op_2134_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2135_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_177_cast_fp16)[name = tensor<string, []>("op_2135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2136_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_179_cast_fp16)[name = tensor<string, []>("op_2136_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2137_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_181_cast_fp16)[name = tensor<string, []>("op_2137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2138_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_183_cast_fp16)[name = tensor<string, []>("op_2138_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2139_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_185_cast_fp16)[name = tensor<string, []>("op_2139_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2140_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_187_cast_fp16)[name = tensor<string, []>("op_2140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2141_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_189_cast_fp16)[name = tensor<string, []>("op_2141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 375]> var_2142_cast_fp16 = softmax(axis = var_1728, x = aw_chunk_cast_fp16)[name = tensor<string, []>("op_2142_cast_fp16")];
+            tensor<string, []> var_2144_equation_0 = const()[name = tensor<string, []>("op_2144_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2144_cast_fp16 = einsum(equation = var_2144_equation_0, values = (var_2000_cast_fp16, var_2119_cast_fp16))[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<string, []> var_2146_equation_0 = const()[name = tensor<string, []>("op_2146_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2146_cast_fp16 = einsum(equation = var_2146_equation_0, values = (var_2000_cast_fp16, var_2120_cast_fp16))[name = tensor<string, []>("op_2146_cast_fp16")];
+            tensor<string, []> var_2148_equation_0 = const()[name = tensor<string, []>("op_2148_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2148_cast_fp16 = einsum(equation = var_2148_equation_0, values = (var_2000_cast_fp16, var_2121_cast_fp16))[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<string, []> var_2150_equation_0 = const()[name = tensor<string, []>("op_2150_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2150_cast_fp16 = einsum(equation = var_2150_equation_0, values = (var_2000_cast_fp16, var_2122_cast_fp16))[name = tensor<string, []>("op_2150_cast_fp16")];
+            tensor<string, []> var_2152_equation_0 = const()[name = tensor<string, []>("op_2152_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2152_cast_fp16 = einsum(equation = var_2152_equation_0, values = (var_2004_cast_fp16, var_2123_cast_fp16))[name = tensor<string, []>("op_2152_cast_fp16")];
+            tensor<string, []> var_2154_equation_0 = const()[name = tensor<string, []>("op_2154_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2154_cast_fp16 = einsum(equation = var_2154_equation_0, values = (var_2004_cast_fp16, var_2124_cast_fp16))[name = tensor<string, []>("op_2154_cast_fp16")];
+            tensor<string, []> var_2156_equation_0 = const()[name = tensor<string, []>("op_2156_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2156_cast_fp16 = einsum(equation = var_2156_equation_0, values = (var_2004_cast_fp16, var_2125_cast_fp16))[name = tensor<string, []>("op_2156_cast_fp16")];
+            tensor<string, []> var_2158_equation_0 = const()[name = tensor<string, []>("op_2158_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2158_cast_fp16 = einsum(equation = var_2158_equation_0, values = (var_2004_cast_fp16, var_2126_cast_fp16))[name = tensor<string, []>("op_2158_cast_fp16")];
+            tensor<string, []> var_2160_equation_0 = const()[name = tensor<string, []>("op_2160_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2160_cast_fp16 = einsum(equation = var_2160_equation_0, values = (var_2008_cast_fp16, var_2127_cast_fp16))[name = tensor<string, []>("op_2160_cast_fp16")];
+            tensor<string, []> var_2162_equation_0 = const()[name = tensor<string, []>("op_2162_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2162_cast_fp16 = einsum(equation = var_2162_equation_0, values = (var_2008_cast_fp16, var_2128_cast_fp16))[name = tensor<string, []>("op_2162_cast_fp16")];
+            tensor<string, []> var_2164_equation_0 = const()[name = tensor<string, []>("op_2164_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2164_cast_fp16 = einsum(equation = var_2164_equation_0, values = (var_2008_cast_fp16, var_2129_cast_fp16))[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<string, []> var_2166_equation_0 = const()[name = tensor<string, []>("op_2166_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2166_cast_fp16 = einsum(equation = var_2166_equation_0, values = (var_2008_cast_fp16, var_2130_cast_fp16))[name = tensor<string, []>("op_2166_cast_fp16")];
+            tensor<string, []> var_2168_equation_0 = const()[name = tensor<string, []>("op_2168_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2168_cast_fp16 = einsum(equation = var_2168_equation_0, values = (var_2012_cast_fp16, var_2131_cast_fp16))[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<string, []> var_2170_equation_0 = const()[name = tensor<string, []>("op_2170_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2170_cast_fp16 = einsum(equation = var_2170_equation_0, values = (var_2012_cast_fp16, var_2132_cast_fp16))[name = tensor<string, []>("op_2170_cast_fp16")];
+            tensor<string, []> var_2172_equation_0 = const()[name = tensor<string, []>("op_2172_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2172_cast_fp16 = einsum(equation = var_2172_equation_0, values = (var_2012_cast_fp16, var_2133_cast_fp16))[name = tensor<string, []>("op_2172_cast_fp16")];
+            tensor<string, []> var_2174_equation_0 = const()[name = tensor<string, []>("op_2174_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2174_cast_fp16 = einsum(equation = var_2174_equation_0, values = (var_2012_cast_fp16, var_2134_cast_fp16))[name = tensor<string, []>("op_2174_cast_fp16")];
+            tensor<string, []> var_2176_equation_0 = const()[name = tensor<string, []>("op_2176_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2176_cast_fp16 = einsum(equation = var_2176_equation_0, values = (var_2016_cast_fp16, var_2135_cast_fp16))[name = tensor<string, []>("op_2176_cast_fp16")];
+            tensor<string, []> var_2178_equation_0 = const()[name = tensor<string, []>("op_2178_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2178_cast_fp16 = einsum(equation = var_2178_equation_0, values = (var_2016_cast_fp16, var_2136_cast_fp16))[name = tensor<string, []>("op_2178_cast_fp16")];
+            tensor<string, []> var_2180_equation_0 = const()[name = tensor<string, []>("op_2180_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2180_cast_fp16 = einsum(equation = var_2180_equation_0, values = (var_2016_cast_fp16, var_2137_cast_fp16))[name = tensor<string, []>("op_2180_cast_fp16")];
+            tensor<string, []> var_2182_equation_0 = const()[name = tensor<string, []>("op_2182_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2182_cast_fp16 = einsum(equation = var_2182_equation_0, values = (var_2016_cast_fp16, var_2138_cast_fp16))[name = tensor<string, []>("op_2182_cast_fp16")];
+            tensor<string, []> var_2184_equation_0 = const()[name = tensor<string, []>("op_2184_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2184_cast_fp16 = einsum(equation = var_2184_equation_0, values = (var_2020_cast_fp16, var_2139_cast_fp16))[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<string, []> var_2186_equation_0 = const()[name = tensor<string, []>("op_2186_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2186_cast_fp16 = einsum(equation = var_2186_equation_0, values = (var_2020_cast_fp16, var_2140_cast_fp16))[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<string, []> var_2188_equation_0 = const()[name = tensor<string, []>("op_2188_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2188_cast_fp16 = einsum(equation = var_2188_equation_0, values = (var_2020_cast_fp16, var_2141_cast_fp16))[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<string, []> var_2190_equation_0 = const()[name = tensor<string, []>("op_2190_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 375]> var_2190_cast_fp16 = einsum(equation = var_2190_equation_0, values = (var_2020_cast_fp16, var_2142_cast_fp16))[name = tensor<string, []>("op_2190_cast_fp16")];
+            tensor<bool, []> var_2192_interleave_0 = const()[name = tensor<string, []>("op_2192_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2192_cast_fp16 = concat(axis = var_1717, interleave = var_2192_interleave_0, values = (var_2144_cast_fp16, var_2146_cast_fp16, var_2148_cast_fp16, var_2150_cast_fp16))[name = tensor<string, []>("op_2192_cast_fp16")];
+            tensor<bool, []> var_2194_interleave_0 = const()[name = tensor<string, []>("op_2194_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2194_cast_fp16 = concat(axis = var_1717, interleave = var_2194_interleave_0, values = (var_2152_cast_fp16, var_2154_cast_fp16, var_2156_cast_fp16, var_2158_cast_fp16))[name = tensor<string, []>("op_2194_cast_fp16")];
+            tensor<bool, []> var_2196_interleave_0 = const()[name = tensor<string, []>("op_2196_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2196_cast_fp16 = concat(axis = var_1717, interleave = var_2196_interleave_0, values = (var_2160_cast_fp16, var_2162_cast_fp16, var_2164_cast_fp16, var_2166_cast_fp16))[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<bool, []> var_2198_interleave_0 = const()[name = tensor<string, []>("op_2198_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2198_cast_fp16 = concat(axis = var_1717, interleave = var_2198_interleave_0, values = (var_2168_cast_fp16, var_2170_cast_fp16, var_2172_cast_fp16, var_2174_cast_fp16))[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<bool, []> var_2200_interleave_0 = const()[name = tensor<string, []>("op_2200_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2200_cast_fp16 = concat(axis = var_1717, interleave = var_2200_interleave_0, values = (var_2176_cast_fp16, var_2178_cast_fp16, var_2180_cast_fp16, var_2182_cast_fp16))[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<bool, []> var_2202_interleave_0 = const()[name = tensor<string, []>("op_2202_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2202_cast_fp16 = concat(axis = var_1717, interleave = var_2202_interleave_0, values = (var_2184_cast_fp16, var_2186_cast_fp16, var_2188_cast_fp16, var_2190_cast_fp16))[name = tensor<string, []>("op_2202_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_25_cast_fp16 = concat(axis = var_1728, interleave = input_25_interleave_0, values = (var_2192_cast_fp16, var_2194_cast_fp16, var_2196_cast_fp16, var_2198_cast_fp16, var_2200_cast_fp16, var_2202_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_2207 = const()[name = tensor<string, []>("op_2207"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2209 = const()[name = tensor<string, []>("op_2209"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_pad_type_0 = const()[name = tensor<string, []>("obj_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_pad_0 = const()[name = tensor<string, []>("obj_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13760256)))];
+            tensor<fp16, [384]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14055232)))];
+            tensor<fp16, [1, 384, 1, 1500]> obj_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_2209, groups = var_1728, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = var_2207, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_2215 = const()[name = tensor<string, []>("op_2215"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_2215, keep_dims = var_1729, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_2219 = const()[name = tensor<string, []>("op_2219"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2220_cast_fp16 = reduce_mean(axes = var_2219, keep_dims = var_1729, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_2220_cast_fp16")];
+            tensor<fp16, []> var_2221_to_fp16 = const()[name = tensor<string, []>("op_2221_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2222_cast_fp16 = add(x = var_2220_cast_fp16, y = var_2221_to_fp16)[name = tensor<string, []>("op_2222_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_2222_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [384]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14056064)))];
+            tensor<fp16, [384]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14056896)))];
+            tensor<fp16, []> input_27_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_27_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<int32, [2]> var_2233 = const()[name = tensor<string, []>("op_2233"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2235 = const()[name = tensor<string, []>("op_2235"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14057728)))];
+            tensor<fp16, [1536]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15237440)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_2235, groups = var_1728, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = var_2233, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_2241 = const()[name = tensor<string, []>("op_2241"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2243 = const()[name = tensor<string, []>("op_2243"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_pad_type_0 = const()[name = tensor<string, []>("hidden_states_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_pad_0 = const()[name = tensor<string, []>("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15240576)))];
+            tensor<fp16, [384]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16420288)))];
+            tensor<fp16, [1, 384, 1, 1500]> hidden_states_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_2243, groups = var_1728, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = var_2241, weight = layers_3_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_2249 = const()[name = tensor<string, []>("op_2249"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_2253 = const()[name = tensor<string, []>("op_2253"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> channels_mean_cast_fp16 = reduce_mean(axes = var_2253, keep_dims = var_2249, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_2257 = const()[name = tensor<string, []>("op_2257"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1500]> var_2258_cast_fp16 = reduce_mean(axes = var_2257, keep_dims = var_2249, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_2258_cast_fp16")];
+            tensor<fp16, []> var_2259_to_fp16 = const()[name = tensor<string, []>("op_2259_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1500]> var_2260_cast_fp16 = add(x = var_2258_cast_fp16, y = var_2259_to_fp16)[name = tensor<string, []>("op_2260_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1500]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_2260_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [384]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16421120)))];
+            tensor<fp16, [384]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16421952)))];
+            tensor<fp16, []> encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor<string, []>("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("encoder_output_embeds_type_fp32_cast_fp16")];
+        } -> (encoder_output_embeds);
+}
\ No newline at end of file
diff --git a/openai_whisper-tiny/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-tiny/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..1d7233cedc3da4c7344f4be5a83781a9603b04fe
--- /dev/null
+++ b/openai_whisper-tiny/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:030d64a3ddd296d6f709691a66a870aab7ee9f19e5fe07e8086245fb85302802
+size 54965
diff --git a/openai_whisper-tiny/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-tiny/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..72b7a099415499f378fda9fc4c827352894abc37
--- /dev/null
+++ b/openai_whisper-tiny/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcd0879f6d1c61832765c7ec05d883d0dcbf1504057b13095fd315484196fc5e
+size 16422784
diff --git a/openai_whisper-tiny/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-tiny/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8cce829d60c4056be93333a562d47d3bb2908b9b
--- /dev/null
+++ b/openai_whisper-tiny/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f77e6457285248f99cd7aa3fd4cc2efbb17733e63e7023ac53abe1f95785d07
+size 243
diff --git a/openai_whisper-tiny/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-tiny/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1d9e7a102f740c68cdfc7272dc5b8007c48416a
--- /dev/null
+++ b/openai_whisper-tiny/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dabdc5aa69f6ef4d97dc9499f5c30514e00e96b53b750b33a5a6471363c71662
+size 328
diff --git a/openai_whisper-tiny/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-tiny/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2d1d4a4a0390fd84f87e4780bd5c76c066220991
--- /dev/null
+++ b/openai_whisper-tiny/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Pad" : 1,
+      "Ios16.mul" : 2,
+      "SliceByIndex" : 1,
+      "Ios16.sub" : 1,
+      "Ios16.log" : 1,
+      "Ios16.conv" : 2,
+      "Ios16.add" : 3,
+      "Ios16.square" : 2,
+      "Ios16.matmul" : 1,
+      "Squeeze" : 2,
+      "Ios16.maximum" : 1,
+      "ExpandDims" : 4,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Ios16.reshape" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "7.1",
+      "com.github.apple.coremltools.source" : "torch==2.2.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-tiny/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-tiny/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..a63d7fa99d6d86db1b76a1f53640cb4aa25e0210
--- /dev/null
+++ b/openai_whisper-tiny/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = tensor<string, []>("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            tensor<string, []> input_3_mode_0 = const()[name = tensor<string, []>("input_3_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> input_3_constant_val_0_to_fp16 = const()[name = tensor<string, []>("input_3_constant_val_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = input_3_constant_val_0_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = tensor<string, []>("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = tensor<string, []>("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = tensor<string, []>("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = tensor<string, []>("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = tensor<string, []>("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = tensor<string, []>("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = tensor<string, []>("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = tensor<string, []>("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = tensor<string, []>("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = tensor<string, []>("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = tensor<string, []>("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = tensor<string, []>("magnitudes_cast_fp16")];
+            tensor<bool, []> mel_spec_1_transpose_x_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> mel_spec_1_transpose_y_0 = const()[name = tensor<string, []>("mel_spec_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = tensor<string, []>("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = tensor<string, []>("mel_spec_1_cast_fp16")];
+            tensor<fp16, []> var_41_to_fp16 = const()[name = tensor<string, []>("op_41_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = tensor<string, []>("mel_spec_cast_fp16")];
+            tensor<fp16, []> log_0_epsilon_0_to_fp16 = const()[name = tensor<string, []>("log_0_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0_to_fp16, x = mel_spec_cast_fp16)[name = tensor<string, []>("log_0_cast_fp16")];
+            tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
+            tensor<bool, []> var_44_keep_dims_0 = const()[name = tensor<string, []>("op_44_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, []> var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+3)];
+            tensor<fp16, []> var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = tensor<string, []>("log_spec_3_cast_fp16")];
+            tensor<fp16, []> var_50_to_fp16 = const()[name = tensor<string, []>("op_50_to_fp16"), val = tensor<fp16, []>(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
+            tensor<fp16, []> _inversed_log_spec_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_log_spec_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = tensor<string, []>("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = tensor<string, []>("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = tensor<string, []>("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-tiny/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-tiny/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..38b665c4196c5c8ee613e45967c28f2fb70beffa
--- /dev/null
+++ b/openai_whisper-tiny/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b65b76f4e1dab57239e3946f6ab1314a7d1fdfa114485683dd04476ca62adb6
+size 354080
diff --git a/openai_whisper-tiny/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-tiny/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..72c51452b03b50a547955b59cb25bd234310f19d
--- /dev/null
+++ b/openai_whisper-tiny/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfbe102ae5fb9368974a077f780441dd222fdfb0c7778c1df227ef6a73cbaada
+size 243
diff --git a/openai_whisper-tiny/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-tiny/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4c6ec9df05ac3db18915c50d2ba4a4d3721c601
--- /dev/null
+++ b/openai_whisper-tiny/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:292f96416a33f9a80aaa62ead3dd5206aee6c5e6b3ac6cc02c059d38cbf04c6a
+size 633
diff --git a/openai_whisper-tiny/TextDecoder.mlmodelc/metadata.json b/openai_whisper-tiny/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3ac3ea39478a97e35d648f7d835a10f461cc4a20
--- /dev/null
+++ b/openai_whisper-tiny/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,165 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51865)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51865]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Split" : 2,
+      "Concat" : 3,
+      "Ios16.rsqrt" : 13,
+      "Ios16.mul" : 50,
+      "Squeeze" : 1,
+      "SliceByIndex" : 12,
+      "Ios16.sub" : 14,
+      "Transpose" : 1,
+      "Ios16.conv" : 40,
+      "Ios16.add" : 38,
+      "Ios16.linear" : 1,
+      "Ios16.matmul" : 16,
+      "Ios16.gelu" : 4,
+      "Ios16.reduceMean" : 27,
+      "ExpandDims" : 6,
+      "Ios16.batchNorm" : 13,
+      "Ios16.gather" : 2,
+      "Ios16.reshape" : 32,
+      "Ios16.softmax" : 8
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.1",
+      "com.github.apple.coremltools.version" : "7.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 224]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536 × 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536, 1, 224]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 384 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 384, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 224)",
+        "shortDescription" : "",
+        "shape" : "[1, 224]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoder",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-tiny/TextDecoder.mlmodelc/model.mil b/openai_whisper-tiny/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0803b55bff70f043787edb3e629f5ef27c806b44
--- /dev/null
+++ b/openai_whisper-tiny/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,773 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})]
+{
+    func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 224]> decoder_key_padding_mask, tensor<fp16, [1, 384, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 1536, 1, 224]> key_cache, tensor<fp16, [1, 224]> kv_cache_update_mask, tensor<fp16, [1, 1536, 1, 224]> value_cache) {
+            tensor<int32, []> var_24_axis_0 = const()[name = tensor<string, []>("op_24_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_24_batch_dims_0 = const()[name = tensor<string, []>("op_24_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [51865, 384]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 384]> var_24_cast_fp16 = gather(axis = var_24_axis_0, batch_dims = var_24_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_24_cast_fp16")];
+            tensor<int32, []> var_28_axis_0 = const()[name = tensor<string, []>("op_28_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> var_28_batch_dims_0 = const()[name = tensor<string, []>("op_28_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [448, 384]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39832448)))];
+            tensor<fp16, [1, 384]> var_28_cast_fp16 = gather(axis = var_28_axis_0, batch_dims = var_28_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
+            tensor<fp16, [1, 384]> hidden_states_1_cast_fp16 = add(x = var_24_cast_fp16, y = var_28_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1]> var_42_cast_fp16 = expand_dims(axes = var_42_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_42_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 384, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_42_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, [4]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [4]>([384, 384, 384, 384])];
+            tensor<int32, []> var_47_axis_0 = const()[name = tensor<string, []>("op_47_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 384, 1, 224]> var_47_cast_fp16_0, tensor<fp16, [1, 384, 1, 224]> var_47_cast_fp16_1, tensor<fp16, [1, 384, 1, 224]> var_47_cast_fp16_2, tensor<fp16, [1, 384, 1, 224]> var_47_cast_fp16_3 = split(axis = var_47_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_47_cast_fp16")];
+            tensor<int32, [4]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [4]>([384, 384, 384, 384])];
+            tensor<int32, []> var_54_axis_0 = const()[name = tensor<string, []>("op_54_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 384, 1, 224]> var_54_cast_fp16_0, tensor<fp16, [1, 384, 1, 224]> var_54_cast_fp16_1, tensor<fp16, [1, 384, 1, 224]> var_54_cast_fp16_2, tensor<fp16, [1, 384, 1, 224]> var_54_cast_fp16_3 = split(axis = var_54_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_54_cast_fp16")];
+            tensor<int32, []> var_64 = const()[name = tensor<string, []>("op_64"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_71 = const()[name = tensor<string, []>("op_71"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_72 = const()[name = tensor<string, []>("op_72"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_84 = const()[name = tensor<string, []>("op_84"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_1_cast_fp16 = reduce_mean(axes = var_84, keep_dims = var_72, x = inputs_1_cast_fp16)[name = tensor<string, []>("channels_mean_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor<string, []>("zero_mean_sq_1_cast_fp16")];
+            tensor<int32, [1]> var_88 = const()[name = tensor<string, []>("op_88"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_89_cast_fp16 = reduce_mean(axes = var_88, keep_dims = var_72, x = zero_mean_sq_1_cast_fp16)[name = tensor<string, []>("op_89_cast_fp16")];
+            tensor<fp16, []> var_90_to_fp16 = const()[name = tensor<string, []>("op_90_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_91_cast_fp16 = add(x = var_89_cast_fp16, y = var_90_to_fp16)[name = tensor<string, []>("op_91_cast_fp16")];
+            tensor<fp16, []> denom_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_91_cast_fp16)[name = tensor<string, []>("denom_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
+            tensor<fp16, [384]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40176576)))];
+            tensor<fp16, [384]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40177408)))];
+            tensor<fp16, [384]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40178240)))];
+            tensor<fp16, [384]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40179072)))];
+            tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
+            tensor<int32, [2]> var_106 = const()[name = tensor<string, []>("op_106"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_108 = const()[name = tensor<string, []>("op_108"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40179904)))];
+            tensor<fp16, [384]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40474880)))];
+            tensor<fp16, [1, 384, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_108, groups = var_71, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_106, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
+            tensor<int32, [2]> var_112 = const()[name = tensor<string, []>("op_112"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_114 = const()[name = tensor<string, []>("op_114"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40475712)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_1_cast_fp16 = conv(dilations = var_114, groups = var_71, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = var_112, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
+            tensor<int32, [2]> var_119 = const()[name = tensor<string, []>("op_119"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_121 = const()[name = tensor<string, []>("op_121"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40770688)))];
+            tensor<fp16, [384]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41065664)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_121, groups = var_71, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = var_119, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_125_axes_0 = const()[name = tensor<string, []>("op_125_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_125_cast_fp16 = expand_dims(axes = var_125_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_125_cast_fp16")];
+            tensor<int32, [1]> var_126_axes_0 = const()[name = tensor<string, []>("op_126_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_126_cast_fp16 = expand_dims(axes = var_126_axes_0, x = var_125_cast_fp16)[name = tensor<string, []>("op_126_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_128_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_128_cast_fp16")];
+            tensor<fp16, []> var_65_to_fp16 = const()[name = tensor<string, []>("op_65_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 1, 1, 224]> var_129_cast_fp16 = sub(x = var_65_to_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_130_cast_fp16 = mul(x = var_47_cast_fp16_0, y = var_129_cast_fp16)[name = tensor<string, []>("op_130_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> key_1_cast_fp16 = add(x = var_128_cast_fp16, y = var_130_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_132_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_132_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_134_cast_fp16 = mul(x = var_54_cast_fp16_0, y = var_129_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> value_1_cast_fp16 = add(x = var_132_cast_fp16, y = var_134_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
+            tensor<int32, [4]> var_137 = const()[name = tensor<string, []>("op_137"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_138_cast_fp16 = reshape(shape = var_137, x = query_1_cast_fp16)[name = tensor<string, []>("op_138_cast_fp16")];
+            tensor<fp16, []> var_139_to_fp16 = const()[name = tensor<string, []>("op_139_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_140_cast_fp16 = mul(x = var_138_cast_fp16, y = var_139_to_fp16)[name = tensor<string, []>("op_140_cast_fp16")];
+            tensor<int32, [4]> var_141 = const()[name = tensor<string, []>("op_141"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_142_cast_fp16 = reshape(shape = var_141, x = key_1_cast_fp16)[name = tensor<string, []>("op_142_cast_fp16")];
+            tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_140_cast_fp16, y = var_142_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_146_axes_0 = const()[name = tensor<string, []>("op_146_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 224]> var_146_cast_fp16 = expand_dims(axes = var_146_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_146_cast_fp16")];
+            tensor<int32, [1]> var_147_axes_0 = const()[name = tensor<string, []>("op_147_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 224]> var_147_cast_fp16 = expand_dims(axes = var_147_axes_0, x = var_146_cast_fp16)[name = tensor<string, []>("op_147_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> var_150_cast_fp16 = softmax(axis = var_64, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_150_cast_fp16")];
+            tensor<int32, [4]> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_152_cast_fp16 = reshape(shape = var_151, x = value_1_cast_fp16)[name = tensor<string, []>("op_152_cast_fp16")];
+            tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_152_cast_fp16, y = var_150_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_155 = const()[name = tensor<string, []>("op_155"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_1_cast_fp16 = reshape(shape = var_155, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [2]> var_159 = const()[name = tensor<string, []>("op_159"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_161 = const()[name = tensor<string, []>("op_161"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41066496)))];
+            tensor<fp16, [384]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41361472)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_161, groups = var_71, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_159, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> var_171 = const()[name = tensor<string, []>("op_171"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_3_cast_fp16 = reduce_mean(axes = var_171, keep_dims = var_72, x = inputs_3_cast_fp16)[name = tensor<string, []>("channels_mean_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor<string, []>("zero_mean_sq_3_cast_fp16")];
+            tensor<int32, [1]> var_175 = const()[name = tensor<string, []>("op_175"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_176_cast_fp16 = reduce_mean(axes = var_175, keep_dims = var_72, x = zero_mean_sq_3_cast_fp16)[name = tensor<string, []>("op_176_cast_fp16")];
+            tensor<fp16, []> var_177_to_fp16 = const()[name = tensor<string, []>("op_177_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_178_cast_fp16 = add(x = var_176_cast_fp16, y = var_177_to_fp16)[name = tensor<string, []>("op_178_cast_fp16")];
+            tensor<fp16, []> denom_3_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_3_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_178_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
+            tensor<fp16, [384]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41362304)))];
+            tensor<fp16, [384]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41363136)))];
+            tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
+            tensor<int32, [2]> var_193 = const()[name = tensor<string, []>("op_193"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_195 = const()[name = tensor<string, []>("op_195"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41363968)))];
+            tensor<fp16, [384]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41658944)))];
+            tensor<fp16, [1, 384, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = var_195, groups = var_71, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_193, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<int32, [2]> var_199 = const()[name = tensor<string, []>("op_199"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_201 = const()[name = tensor<string, []>("op_201"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41659776)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_3_cast_fp16 = conv(dilations = var_201, groups = var_71, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_199, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
+            tensor<int32, [2]> var_206 = const()[name = tensor<string, []>("op_206"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_208 = const()[name = tensor<string, []>("op_208"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41954752)))];
+            tensor<fp16, [384]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42249728)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = var_208, groups = var_71, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_206, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
+            tensor<int32, [4]> var_212 = const()[name = tensor<string, []>("op_212"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_213_cast_fp16 = reshape(shape = var_212, x = query_3_cast_fp16)[name = tensor<string, []>("op_213_cast_fp16")];
+            tensor<fp16, []> var_214_to_fp16 = const()[name = tensor<string, []>("op_214_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_215_cast_fp16 = mul(x = var_213_cast_fp16, y = var_214_to_fp16)[name = tensor<string, []>("op_215_cast_fp16")];
+            tensor<int32, [4]> var_216 = const()[name = tensor<string, []>("op_216"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_217_cast_fp16 = reshape(shape = var_216, x = key_3_cast_fp16)[name = tensor<string, []>("op_217_cast_fp16")];
+            tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_215_cast_fp16, y = var_217_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_64, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
+            tensor<int32, [4]> var_221 = const()[name = tensor<string, []>("op_221"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_222_cast_fp16 = reshape(shape = var_221, x = value_3_cast_fp16)[name = tensor<string, []>("op_222_cast_fp16")];
+            tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_222_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_225 = const()[name = tensor<string, []>("op_225"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_3_cast_fp16 = reshape(shape = var_225, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [2]> var_229 = const()[name = tensor<string, []>("op_229"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42250560)))];
+            tensor<fp16, [384]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42545536)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = var_231, groups = var_71, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_229, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, [1]> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_5_cast_fp16 = reduce_mean(axes = var_237, keep_dims = var_72, x = inputs_5_cast_fp16)[name = tensor<string, []>("channels_mean_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor<string, []>("zero_mean_sq_5_cast_fp16")];
+            tensor<int32, [1]> var_241 = const()[name = tensor<string, []>("op_241"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_242_cast_fp16 = reduce_mean(axes = var_241, keep_dims = var_72, x = zero_mean_sq_5_cast_fp16)[name = tensor<string, []>("op_242_cast_fp16")];
+            tensor<fp16, []> var_243_to_fp16 = const()[name = tensor<string, []>("op_243_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_244_cast_fp16 = add(x = var_242_cast_fp16, y = var_243_to_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<fp16, []> denom_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_244_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
+            tensor<fp16, [384]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42546368)))];
+            tensor<fp16, [384]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42547200)))];
+            tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<int32, [2]> var_255 = const()[name = tensor<string, []>("op_255"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_257 = const()[name = tensor<string, []>("op_257"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42548032)))];
+            tensor<fp16, [1536]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43727744)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_257, groups = var_71, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_255, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<int32, [2]> var_263 = const()[name = tensor<string, []>("op_263"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_265 = const()[name = tensor<string, []>("op_265"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43730880)))];
+            tensor<fp16, [384]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44910592)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_265, groups = var_71, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = var_263, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, []> var_278 = const()[name = tensor<string, []>("op_278"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_285 = const()[name = tensor<string, []>("op_285"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_286 = const()[name = tensor<string, []>("op_286"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_298 = const()[name = tensor<string, []>("op_298"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_7_cast_fp16 = reduce_mean(axes = var_298, keep_dims = var_286, x = inputs_7_cast_fp16)[name = tensor<string, []>("channels_mean_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor<string, []>("zero_mean_sq_7_cast_fp16")];
+            tensor<int32, [1]> var_302 = const()[name = tensor<string, []>("op_302"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_303_cast_fp16 = reduce_mean(axes = var_302, keep_dims = var_286, x = zero_mean_sq_7_cast_fp16)[name = tensor<string, []>("op_303_cast_fp16")];
+            tensor<fp16, []> var_304_to_fp16 = const()[name = tensor<string, []>("op_304_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_305_cast_fp16 = add(x = var_303_cast_fp16, y = var_304_to_fp16)[name = tensor<string, []>("op_305_cast_fp16")];
+            tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_305_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [384]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44911424)))];
+            tensor<fp16, [384]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44912256)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
+            tensor<int32, [2]> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_322 = const()[name = tensor<string, []>("op_322"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44913088)))];
+            tensor<fp16, [384]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45208064)))];
+            tensor<fp16, [1, 384, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_322, groups = var_285, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_320, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<int32, [2]> var_326 = const()[name = tensor<string, []>("op_326"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_328 = const()[name = tensor<string, []>("op_328"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45208896)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_3_cast_fp16 = conv(dilations = var_328, groups = var_285, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = var_326, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
+            tensor<int32, [2]> var_333 = const()[name = tensor<string, []>("op_333"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_335 = const()[name = tensor<string, []>("op_335"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45503872)))];
+            tensor<fp16, [384]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45798848)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_335, groups = var_285, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = var_333, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_342_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_342_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_344_cast_fp16 = mul(x = var_47_cast_fp16_1, y = var_129_cast_fp16)[name = tensor<string, []>("op_344_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> key_5_cast_fp16 = add(x = var_342_cast_fp16, y = var_344_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_346_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_348_cast_fp16 = mul(x = var_54_cast_fp16_1, y = var_129_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> value_5_cast_fp16 = add(x = var_346_cast_fp16, y = var_348_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
+            tensor<int32, [4]> var_351 = const()[name = tensor<string, []>("op_351"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_352_cast_fp16 = reshape(shape = var_351, x = query_5_cast_fp16)[name = tensor<string, []>("op_352_cast_fp16")];
+            tensor<fp16, []> var_353_to_fp16 = const()[name = tensor<string, []>("op_353_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_354_cast_fp16 = mul(x = var_352_cast_fp16, y = var_353_to_fp16)[name = tensor<string, []>("op_354_cast_fp16")];
+            tensor<int32, [4]> var_355 = const()[name = tensor<string, []>("op_355"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_356_cast_fp16 = reshape(shape = var_355, x = key_5_cast_fp16)[name = tensor<string, []>("op_356_cast_fp16")];
+            tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_354_cast_fp16, y = var_356_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> var_364_cast_fp16 = softmax(axis = var_278, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<int32, [4]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_366_cast_fp16 = reshape(shape = var_365, x = value_5_cast_fp16)[name = tensor<string, []>("op_366_cast_fp16")];
+            tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_366_cast_fp16, y = var_364_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_11_cast_fp16 = reshape(shape = var_369, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<int32, [2]> var_373 = const()[name = tensor<string, []>("op_373"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_375 = const()[name = tensor<string, []>("op_375"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45799680)))];
+            tensor<fp16, [384]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46094656)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_375, groups = var_285, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_373, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, [1]> var_385 = const()[name = tensor<string, []>("op_385"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_385, keep_dims = var_286, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_sq_9_cast_fp16")];
+            tensor<int32, [1]> var_389 = const()[name = tensor<string, []>("op_389"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_390_cast_fp16 = reduce_mean(axes = var_389, keep_dims = var_286, x = zero_mean_sq_9_cast_fp16)[name = tensor<string, []>("op_390_cast_fp16")];
+            tensor<fp16, []> var_391_to_fp16 = const()[name = tensor<string, []>("op_391_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_392_cast_fp16 = add(x = var_390_cast_fp16, y = var_391_to_fp16)[name = tensor<string, []>("op_392_cast_fp16")];
+            tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_392_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [384]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46095488)))];
+            tensor<fp16, [384]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46096320)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
+            tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_409 = const()[name = tensor<string, []>("op_409"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46097152)))];
+            tensor<fp16, [384]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46392128)))];
+            tensor<fp16, [1, 384, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_409, groups = var_285, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_407, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<int32, [2]> var_413 = const()[name = tensor<string, []>("op_413"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_415 = const()[name = tensor<string, []>("op_415"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46392960)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_7_cast_fp16 = conv(dilations = var_415, groups = var_285, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_413, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
+            tensor<int32, [2]> var_420 = const()[name = tensor<string, []>("op_420"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_422 = const()[name = tensor<string, []>("op_422"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46687936)))];
+            tensor<fp16, [384]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46982912)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = var_422, groups = var_285, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_420, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
+            tensor<int32, [4]> var_426 = const()[name = tensor<string, []>("op_426"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_427_cast_fp16 = reshape(shape = var_426, x = query_7_cast_fp16)[name = tensor<string, []>("op_427_cast_fp16")];
+            tensor<fp16, []> var_428_to_fp16 = const()[name = tensor<string, []>("op_428_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_429_cast_fp16 = mul(x = var_427_cast_fp16, y = var_428_to_fp16)[name = tensor<string, []>("op_429_cast_fp16")];
+            tensor<int32, [4]> var_430 = const()[name = tensor<string, []>("op_430"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_431_cast_fp16 = reshape(shape = var_430, x = key_7_cast_fp16)[name = tensor<string, []>("op_431_cast_fp16")];
+            tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_429_cast_fp16, y = var_431_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_278, x = mh_w_11_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
+            tensor<int32, [4]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_436_cast_fp16 = reshape(shape = var_435, x = value_7_cast_fp16)[name = tensor<string, []>("op_436_cast_fp16")];
+            tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_436_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_439 = const()[name = tensor<string, []>("op_439"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_13_cast_fp16 = reshape(shape = var_439, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<int32, [2]> var_443 = const()[name = tensor<string, []>("op_443"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_445 = const()[name = tensor<string, []>("op_445"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46983744)))];
+            tensor<fp16, [384]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47278720)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_445, groups = var_285, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_443, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_451 = const()[name = tensor<string, []>("op_451"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_451, keep_dims = var_286, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_455 = const()[name = tensor<string, []>("op_455"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_456_cast_fp16 = reduce_mean(axes = var_455, keep_dims = var_286, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_456_cast_fp16")];
+            tensor<fp16, []> var_457_to_fp16 = const()[name = tensor<string, []>("op_457_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_458_cast_fp16 = add(x = var_456_cast_fp16, y = var_457_to_fp16)[name = tensor<string, []>("op_458_cast_fp16")];
+            tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_458_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
+            tensor<fp16, [384]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47279552)))];
+            tensor<fp16, [384]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47280384)))];
+            tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47281216)))];
+            tensor<fp16, [1536]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48460928)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_471, groups = var_285, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_469, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<int32, [2]> var_477 = const()[name = tensor<string, []>("op_477"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_479 = const()[name = tensor<string, []>("op_479"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48464064)))];
+            tensor<fp16, [384]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49643776)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_479, groups = var_285, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_477, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_492 = const()[name = tensor<string, []>("op_492"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_499 = const()[name = tensor<string, []>("op_499"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_500 = const()[name = tensor<string, []>("op_500"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_512 = const()[name = tensor<string, []>("op_512"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_13_cast_fp16 = reduce_mean(axes = var_512, keep_dims = var_500, x = inputs_13_cast_fp16)[name = tensor<string, []>("channels_mean_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor<string, []>("zero_mean_sq_13_cast_fp16")];
+            tensor<int32, [1]> var_516 = const()[name = tensor<string, []>("op_516"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_517_cast_fp16 = reduce_mean(axes = var_516, keep_dims = var_500, x = zero_mean_sq_13_cast_fp16)[name = tensor<string, []>("op_517_cast_fp16")];
+            tensor<fp16, []> var_518_to_fp16 = const()[name = tensor<string, []>("op_518_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_519_cast_fp16 = add(x = var_517_cast_fp16, y = var_518_to_fp16)[name = tensor<string, []>("op_519_cast_fp16")];
+            tensor<fp16, []> denom_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_519_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
+            tensor<fp16, [384]> obj_29_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49644608)))];
+            tensor<fp16, [384]> obj_29_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_29_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49645440)))];
+            tensor<fp16, []> obj_29_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_29_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_29_cast_fp16")];
+            tensor<int32, [2]> var_534 = const()[name = tensor<string, []>("op_534"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_536 = const()[name = tensor<string, []>("op_536"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49646272)))];
+            tensor<fp16, [384]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49941248)))];
+            tensor<fp16, [1, 384, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_536, groups = var_499, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_534, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<int32, [2]> var_540 = const()[name = tensor<string, []>("op_540"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_542 = const()[name = tensor<string, []>("op_542"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_5_pad_type_0 = const()[name = tensor<string, []>("current_key_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_5_pad_0 = const()[name = tensor<string, []>("current_key_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49942080)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_5_cast_fp16 = conv(dilations = var_542, groups = var_499, pad = current_key_5_pad_0, pad_type = current_key_5_pad_type_0, strides = var_540, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_key_5_cast_fp16")];
+            tensor<int32, [2]> var_547 = const()[name = tensor<string, []>("op_547"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_549 = const()[name = tensor<string, []>("op_549"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_5_pad_type_0 = const()[name = tensor<string, []>("current_value_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_5_pad_0 = const()[name = tensor<string, []>("current_value_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50237056)))];
+            tensor<fp16, [384]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50532032)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_549, groups = var_499, pad = current_value_5_pad_0, pad_type = current_value_5_pad_type_0, strides = var_547, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor<string, []>("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_556_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_556_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_558_cast_fp16 = mul(x = var_47_cast_fp16_2, y = var_129_cast_fp16)[name = tensor<string, []>("op_558_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> key_9_cast_fp16 = add(x = var_556_cast_fp16, y = var_558_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_560_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_562_cast_fp16 = mul(x = var_54_cast_fp16_2, y = var_129_cast_fp16)[name = tensor<string, []>("op_562_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> value_9_cast_fp16 = add(x = var_560_cast_fp16, y = var_562_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
+            tensor<int32, [4]> var_565 = const()[name = tensor<string, []>("op_565"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_566_cast_fp16 = reshape(shape = var_565, x = query_9_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<fp16, []> var_567_to_fp16 = const()[name = tensor<string, []>("op_567_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_568_cast_fp16 = mul(x = var_566_cast_fp16, y = var_567_to_fp16)[name = tensor<string, []>("op_568_cast_fp16")];
+            tensor<int32, [4]> var_569 = const()[name = tensor<string, []>("op_569"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_570_cast_fp16 = reshape(shape = var_569, x = key_9_cast_fp16)[name = tensor<string, []>("op_570_cast_fp16")];
+            tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_568_cast_fp16, y = var_570_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> var_578_cast_fp16 = softmax(axis = var_492, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_578_cast_fp16")];
+            tensor<int32, [4]> var_579 = const()[name = tensor<string, []>("op_579"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_580_cast_fp16 = reshape(shape = var_579, x = value_9_cast_fp16)[name = tensor<string, []>("op_580_cast_fp16")];
+            tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_580_cast_fp16, y = var_578_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_583 = const()[name = tensor<string, []>("op_583"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_21_cast_fp16 = reshape(shape = var_583, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<int32, [2]> var_587 = const()[name = tensor<string, []>("op_587"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_589 = const()[name = tensor<string, []>("op_589"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50532864)))];
+            tensor<fp16, [384]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50827840)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_589, groups = var_499, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_587, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> var_599 = const()[name = tensor<string, []>("op_599"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_15_cast_fp16 = reduce_mean(axes = var_599, keep_dims = var_500, x = inputs_15_cast_fp16)[name = tensor<string, []>("channels_mean_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor<string, []>("zero_mean_sq_15_cast_fp16")];
+            tensor<int32, [1]> var_603 = const()[name = tensor<string, []>("op_603"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_604_cast_fp16 = reduce_mean(axes = var_603, keep_dims = var_500, x = zero_mean_sq_15_cast_fp16)[name = tensor<string, []>("op_604_cast_fp16")];
+            tensor<fp16, []> var_605_to_fp16 = const()[name = tensor<string, []>("op_605_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_606_cast_fp16 = add(x = var_604_cast_fp16, y = var_605_to_fp16)[name = tensor<string, []>("op_606_cast_fp16")];
+            tensor<fp16, []> denom_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_606_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
+            tensor<fp16, [384]> obj_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50828672)))];
+            tensor<fp16, [384]> obj_37_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_37_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50829504)))];
+            tensor<fp16, []> obj_37_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_37_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_37_cast_fp16")];
+            tensor<int32, [2]> var_621 = const()[name = tensor<string, []>("op_621"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_623 = const()[name = tensor<string, []>("op_623"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_11_pad_type_0 = const()[name = tensor<string, []>("query_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_11_pad_0 = const()[name = tensor<string, []>("query_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50830336)))];
+            tensor<fp16, [384]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51125312)))];
+            tensor<fp16, [1, 384, 1, 1]> query_11_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = var_623, groups = var_499, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_621, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
+            tensor<int32, [2]> var_627 = const()[name = tensor<string, []>("op_627"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_629 = const()[name = tensor<string, []>("op_629"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_11_pad_type_0 = const()[name = tensor<string, []>("key_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_11_pad_0 = const()[name = tensor<string, []>("key_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51126144)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_11_cast_fp16 = conv(dilations = var_629, groups = var_499, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_627, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_11_cast_fp16")];
+            tensor<int32, [2]> var_634 = const()[name = tensor<string, []>("op_634"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_636 = const()[name = tensor<string, []>("op_636"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_11_pad_type_0 = const()[name = tensor<string, []>("value_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_11_pad_0 = const()[name = tensor<string, []>("value_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51421120)))];
+            tensor<fp16, [384]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51716096)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_11_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = var_636, groups = var_499, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_634, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_11_cast_fp16")];
+            tensor<int32, [4]> var_640 = const()[name = tensor<string, []>("op_640"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_641_cast_fp16 = reshape(shape = var_640, x = query_11_cast_fp16)[name = tensor<string, []>("op_641_cast_fp16")];
+            tensor<fp16, []> var_642_to_fp16 = const()[name = tensor<string, []>("op_642_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_643_cast_fp16 = mul(x = var_641_cast_fp16, y = var_642_to_fp16)[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<int32, [4]> var_644 = const()[name = tensor<string, []>("op_644"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_645_cast_fp16 = reshape(shape = var_644, x = key_11_cast_fp16)[name = tensor<string, []>("op_645_cast_fp16")];
+            tensor<bool, []> mh_w_17_transpose_x_0 = const()[name = tensor<string, []>("mh_w_17_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_17_transpose_y_0 = const()[name = tensor<string, []>("mh_w_17_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_643_cast_fp16, y = var_645_cast_fp16)[name = tensor<string, []>("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_41_cast_fp16 = softmax(axis = var_492, x = mh_w_17_cast_fp16)[name = tensor<string, []>("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_649 = const()[name = tensor<string, []>("op_649"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_650_cast_fp16 = reshape(shape = var_649, x = value_11_cast_fp16)[name = tensor<string, []>("op_650_cast_fp16")];
+            tensor<bool, []> attn_11_transpose_x_0 = const()[name = tensor<string, []>("attn_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_11_transpose_y_0 = const()[name = tensor<string, []>("attn_11_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_650_cast_fp16, y = obj_41_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_653 = const()[name = tensor<string, []>("op_653"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_23_cast_fp16 = reshape(shape = var_653, x = attn_11_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [2]> var_657 = const()[name = tensor<string, []>("op_657"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_659 = const()[name = tensor<string, []>("op_659"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_39_pad_type_0 = const()[name = tensor<string, []>("obj_39_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_39_pad_0 = const()[name = tensor<string, []>("obj_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51716928)))];
+            tensor<fp16, [384]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52011904)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_39_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = var_659, groups = var_499, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_657, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_39_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, [1]> var_668 = const()[name = tensor<string, []>("op_668"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_17_cast_fp16 = reduce_mean(axes = var_668, keep_dims = var_500, x = inputs_17_cast_fp16)[name = tensor<string, []>("channels_mean_17_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_17_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor<string, []>("zero_mean_sq_17_cast_fp16")];
+            tensor<int32, [1]> var_672 = const()[name = tensor<string, []>("op_672"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_673_cast_fp16 = reduce_mean(axes = var_672, keep_dims = var_500, x = zero_mean_sq_17_cast_fp16)[name = tensor<string, []>("op_673_cast_fp16")];
+            tensor<fp16, []> var_674_to_fp16 = const()[name = tensor<string, []>("op_674_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_675_cast_fp16 = add(x = var_673_cast_fp16, y = var_674_to_fp16)[name = tensor<string, []>("op_675_cast_fp16")];
+            tensor<fp16, []> denom_17_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_17_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_675_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
+            tensor<fp16, [384]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52012736)))];
+            tensor<fp16, [384]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52013568)))];
+            tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<int32, [2]> var_686 = const()[name = tensor<string, []>("op_686"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_688 = const()[name = tensor<string, []>("op_688"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52014400)))];
+            tensor<fp16, [1536]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53194112)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_688, groups = var_499, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = var_686, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_mode_0 = const()[name = tensor<string, []>("input_29_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<int32, [2]> var_694 = const()[name = tensor<string, []>("op_694"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_696 = const()[name = tensor<string, []>("op_696"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53197248)))];
+            tensor<fp16, [384]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54376960)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_696, groups = var_499, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_694, weight = layers_2_fc2_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, []> var_710 = const()[name = tensor<string, []>("op_710"), val = tensor<int32, []>(3)];
+            tensor<int32, []> var_717 = const()[name = tensor<string, []>("op_717"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_718 = const()[name = tensor<string, []>("op_718"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_730 = const()[name = tensor<string, []>("op_730"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_19_cast_fp16 = reduce_mean(axes = var_730, keep_dims = var_718, x = inputs_19_cast_fp16)[name = tensor<string, []>("channels_mean_19_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_19_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor<string, []>("zero_mean_sq_19_cast_fp16")];
+            tensor<int32, [1]> var_734 = const()[name = tensor<string, []>("op_734"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_735_cast_fp16 = reduce_mean(axes = var_734, keep_dims = var_718, x = zero_mean_sq_19_cast_fp16)[name = tensor<string, []>("op_735_cast_fp16")];
+            tensor<fp16, []> var_736_to_fp16 = const()[name = tensor<string, []>("op_736_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_737_cast_fp16 = add(x = var_735_cast_fp16, y = var_736_to_fp16)[name = tensor<string, []>("op_737_cast_fp16")];
+            tensor<fp16, []> denom_19_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_19_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_737_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
+            tensor<fp16, [384]> obj_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54377792)))];
+            tensor<fp16, [384]> obj_43_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_43_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54378624)))];
+            tensor<fp16, []> obj_43_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_43_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor<string, []>("obj_43_cast_fp16")];
+            tensor<int32, [2]> var_752 = const()[name = tensor<string, []>("op_752"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_754 = const()[name = tensor<string, []>("op_754"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_13_pad_type_0 = const()[name = tensor<string, []>("query_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_13_pad_0 = const()[name = tensor<string, []>("query_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54379456)))];
+            tensor<fp16, [384]> layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54674432)))];
+            tensor<fp16, [1, 384, 1, 1]> query_13_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_754, groups = var_717, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_752, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
+            tensor<int32, [2]> var_758 = const()[name = tensor<string, []>("op_758"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_760 = const()[name = tensor<string, []>("op_760"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54675264)))];
+            tensor<fp16, [1, 384, 1, 1]> current_key_cast_fp16 = conv(dilations = var_760, groups = var_717, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_758, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
+            tensor<int32, [2]> var_765 = const()[name = tensor<string, []>("op_765"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_767 = const()[name = tensor<string, []>("op_767"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54970240)))];
+            tensor<fp16, [384]> layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55265216)))];
+            tensor<fp16, [1, 384, 1, 1]> current_value_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_767, groups = var_717, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_765, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_774_cast_fp16 = mul(x = current_key_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_774_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_776_cast_fp16 = mul(x = var_47_cast_fp16_3, y = var_129_cast_fp16)[name = tensor<string, []>("op_776_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> key_13_cast_fp16 = add(x = var_774_cast_fp16, y = var_776_cast_fp16)[name = tensor<string, []>("key_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_778_cast_fp16 = mul(x = current_value_cast_fp16, y = var_126_cast_fp16)[name = tensor<string, []>("op_778_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> var_780_cast_fp16 = mul(x = var_54_cast_fp16_3, y = var_129_cast_fp16)[name = tensor<string, []>("op_780_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 224]> value_13_cast_fp16 = add(x = var_778_cast_fp16, y = var_780_cast_fp16)[name = tensor<string, []>("value_13_cast_fp16")];
+            tensor<int32, [4]> var_783 = const()[name = tensor<string, []>("op_783"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_784_cast_fp16 = reshape(shape = var_783, x = query_13_cast_fp16)[name = tensor<string, []>("op_784_cast_fp16")];
+            tensor<fp16, []> var_785_to_fp16 = const()[name = tensor<string, []>("op_785_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_786_cast_fp16 = mul(x = var_784_cast_fp16, y = var_785_to_fp16)[name = tensor<string, []>("op_786_cast_fp16")];
+            tensor<int32, [4]> var_787 = const()[name = tensor<string, []>("op_787"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_788_cast_fp16 = reshape(shape = var_787, x = key_13_cast_fp16)[name = tensor<string, []>("op_788_cast_fp16")];
+            tensor<bool, []> mh_w_19_transpose_x_0 = const()[name = tensor<string, []>("mh_w_19_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_19_transpose_y_0 = const()[name = tensor<string, []>("mh_w_19_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_786_cast_fp16, y = var_788_cast_fp16)[name = tensor<string, []>("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 224]> var_796_cast_fp16 = softmax(axis = var_710, x = mh_w_21_cast_fp16)[name = tensor<string, []>("op_796_cast_fp16")];
+            tensor<int32, [4]> var_797 = const()[name = tensor<string, []>("op_797"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 224]> var_798_cast_fp16 = reshape(shape = var_797, x = value_13_cast_fp16)[name = tensor<string, []>("op_798_cast_fp16")];
+            tensor<bool, []> attn_13_transpose_x_0 = const()[name = tensor<string, []>("attn_13_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_13_transpose_y_0 = const()[name = tensor<string, []>("attn_13_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_798_cast_fp16, y = var_796_cast_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_801 = const()[name = tensor<string, []>("op_801"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_31_cast_fp16 = reshape(shape = var_801, x = attn_13_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [2]> var_805 = const()[name = tensor<string, []>("op_805"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_807 = const()[name = tensor<string, []>("op_807"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_49_pad_type_0 = const()[name = tensor<string, []>("obj_49_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_49_pad_0 = const()[name = tensor<string, []>("obj_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55266048)))];
+            tensor<fp16, [384]> layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55561024)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_49_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_807, groups = var_717, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = var_805, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("obj_49_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, [1]> var_817 = const()[name = tensor<string, []>("op_817"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_21_cast_fp16 = reduce_mean(axes = var_817, keep_dims = var_718, x = inputs_21_cast_fp16)[name = tensor<string, []>("channels_mean_21_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_21_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor<string, []>("zero_mean_sq_21_cast_fp16")];
+            tensor<int32, [1]> var_821 = const()[name = tensor<string, []>("op_821"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_822_cast_fp16 = reduce_mean(axes = var_821, keep_dims = var_718, x = zero_mean_sq_21_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
+            tensor<fp16, []> var_823_to_fp16 = const()[name = tensor<string, []>("op_823_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_824_cast_fp16 = add(x = var_822_cast_fp16, y = var_823_to_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<fp16, []> denom_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_824_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
+            tensor<fp16, [384]> obj_51_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55561856)))];
+            tensor<fp16, [384]> obj_51_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_51_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55562688)))];
+            tensor<fp16, []> obj_51_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_51_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor<string, []>("obj_51_cast_fp16")];
+            tensor<int32, [2]> var_839 = const()[name = tensor<string, []>("op_839"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_841 = const()[name = tensor<string, []>("op_841"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55563520)))];
+            tensor<fp16, [384]> layers_3_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55858496)))];
+            tensor<fp16, [1, 384, 1, 1]> query_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_bias_to_fp16, dilations = var_841, groups = var_717, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_839, weight = layers_3_encoder_attn_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<int32, [2]> var_845 = const()[name = tensor<string, []>("op_845"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_847 = const()[name = tensor<string, []>("op_847"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55859328)))];
+            tensor<fp16, [1, 384, 1, 1500]> key_cast_fp16 = conv(dilations = var_847, groups = var_717, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_845, weight = layers_3_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
+            tensor<int32, [2]> var_852 = const()[name = tensor<string, []>("op_852"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_854 = const()[name = tensor<string, []>("op_854"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56154304)))];
+            tensor<fp16, [384]> layers_3_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56449280)))];
+            tensor<fp16, [1, 384, 1, 1500]> value_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_bias_to_fp16, dilations = var_854, groups = var_717, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_852, weight = layers_3_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
+            tensor<int32, [4]> var_858 = const()[name = tensor<string, []>("op_858"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1]> var_859_cast_fp16 = reshape(shape = var_858, x = query_cast_fp16)[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<fp16, []> var_860_to_fp16 = const()[name = tensor<string, []>("op_860_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 6, 64, 1]> var_861_cast_fp16 = mul(x = var_859_cast_fp16, y = var_860_to_fp16)[name = tensor<string, []>("op_861_cast_fp16")];
+            tensor<int32, [4]> var_862 = const()[name = tensor<string, []>("op_862"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_863_cast_fp16 = reshape(shape = var_862, x = key_cast_fp16)[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
+            tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_861_cast_fp16, y = var_863_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 6, 1, 1500]> obj_55_cast_fp16 = softmax(axis = var_710, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_55_cast_fp16")];
+            tensor<int32, [4]> var_867 = const()[name = tensor<string, []>("op_867"), val = tensor<int32, [4]>([1, 6, 64, -1])];
+            tensor<fp16, [1, 6, 64, 1500]> var_868_cast_fp16 = reshape(shape = var_867, x = value_cast_fp16)[name = tensor<string, []>("op_868_cast_fp16")];
+            tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 6, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_868_cast_fp16, y = obj_55_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
+            tensor<int32, [4]> var_871 = const()[name = tensor<string, []>("op_871"), val = tensor<int32, [4]>([1, 384, 1, -1])];
+            tensor<fp16, [1, 384, 1, 1]> input_33_cast_fp16 = reshape(shape = var_871, x = attn_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<int32, [2]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_877 = const()[name = tensor<string, []>("op_877"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_53_pad_type_0 = const()[name = tensor<string, []>("obj_53_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_53_pad_0 = const()[name = tensor<string, []>("obj_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 384, 1, 1]> layers_3_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56450112)))];
+            tensor<fp16, [384]> layers_3_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56745088)))];
+            tensor<fp16, [1, 384, 1, 1]> obj_53_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_bias_to_fp16, dilations = var_877, groups = var_717, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = var_875, weight = layers_3_encoder_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("obj_53_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> var_886 = const()[name = tensor<string, []>("op_886"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_23_cast_fp16 = reduce_mean(axes = var_886, keep_dims = var_718, x = inputs_23_cast_fp16)[name = tensor<string, []>("channels_mean_23_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_23_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor<string, []>("zero_mean_sq_23_cast_fp16")];
+            tensor<int32, [1]> var_890 = const()[name = tensor<string, []>("op_890"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_891_cast_fp16 = reduce_mean(axes = var_890, keep_dims = var_718, x = zero_mean_sq_23_cast_fp16)[name = tensor<string, []>("op_891_cast_fp16")];
+            tensor<fp16, []> var_892_to_fp16 = const()[name = tensor<string, []>("op_892_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_893_cast_fp16 = add(x = var_891_cast_fp16, y = var_892_to_fp16)[name = tensor<string, []>("op_893_cast_fp16")];
+            tensor<fp16, []> denom_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_893_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
+            tensor<fp16, [384]> input_35_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_35_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56745920)))];
+            tensor<fp16, [384]> input_35_beta_0_to_fp16 = const()[name = tensor<string, []>("input_35_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56746752)))];
+            tensor<fp16, []> input_35_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_35_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<int32, [2]> var_904 = const()[name = tensor<string, []>("op_904"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_906 = const()[name = tensor<string, []>("op_906"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> input_37_pad_type_0 = const()[name = tensor<string, []>("input_37_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> input_37_pad_0 = const()[name = tensor<string, []>("input_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1536, 384, 1, 1]> layers_3_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56747584)))];
+            tensor<fp16, [1536]> layers_3_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc1_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57927296)))];
+            tensor<fp16, [1, 1536, 1, 1]> input_37_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_906, groups = var_717, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_904, weight = layers_3_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_37_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_912 = const()[name = tensor<string, []>("op_912"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_914 = const()[name = tensor<string, []>("op_914"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> hidden_states_9_pad_type_0 = const()[name = tensor<string, []>("hidden_states_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> hidden_states_9_pad_0 = const()[name = tensor<string, []>("hidden_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [384, 1536, 1, 1]> layers_3_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57930432)))];
+            tensor<fp16, [384]> layers_3_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_3_fc2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59110144)))];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_9_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_914, groups = var_717, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_912, weight = layers_3_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_925 = const()[name = tensor<string, []>("op_925"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_929 = const()[name = tensor<string, []>("op_929"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_929, keep_dims = var_925, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_933 = const()[name = tensor<string, []>("op_933"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_934_cast_fp16 = reduce_mean(axes = var_933, keep_dims = var_925, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_934_cast_fp16")];
+            tensor<fp16, []> var_935_to_fp16 = const()[name = tensor<string, []>("op_935_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_936_cast_fp16 = add(x = var_934_cast_fp16, y = var_935_to_fp16)[name = tensor<string, []>("op_936_cast_fp16")];
+            tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_936_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
+            tensor<fp16, [384]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59110976)))];
+            tensor<fp16, [384]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59111808)))];
+            tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_946_axes_0 = const()[name = tensor<string, []>("op_946_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1]> var_946_cast_fp16 = squeeze(axes = var_946_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_946_cast_fp16")];
+            tensor<int32, [3]> var_949_perm_0 = const()[name = tensor<string, []>("op_949_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59112640)))];
+            tensor<fp16, [1, 1, 384]> transpose_0 = transpose(perm = var_949_perm_0, x = var_946_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_953 = const()[name = tensor<string, []>("op_953"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_59_interleave_0 = const()[name = tensor<string, []>("obj_59_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1536, 1, 1]> key_cache_updates = concat(axis = var_953, interleave = obj_59_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_59_cast_fp16")];
+            tensor<int32, []> var_956 = const()[name = tensor<string, []>("op_956"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_61_interleave_0 = const()[name = tensor<string, []>("obj_61_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1536, 1, 1]> value_cache_updates = concat(axis = var_956, interleave = obj_61_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_61_cast_fp16")];
+            tensor<int32, [4]> var_967_begin_0 = const()[name = tensor<string, []>("op_967_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_967_end_0 = const()[name = tensor<string, []>("op_967_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_967_end_mask_0 = const()[name = tensor<string, []>("op_967_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_967_cast_fp16 = slice_by_index(begin = var_967_begin_0, end = var_967_end_0, end_mask = var_967_end_mask_0, x = obj_41_cast_fp16)[name = tensor<string, []>("op_967_cast_fp16")];
+            tensor<int32, [4]> var_970_begin_0 = const()[name = tensor<string, []>("op_970_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_970_end_0 = const()[name = tensor<string, []>("op_970_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_970_end_mask_0 = const()[name = tensor<string, []>("op_970_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_970_squeeze_mask_0 = const()[name = tensor<string, []>("op_970_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_970_cast_fp16 = slice_by_index(begin = var_970_begin_0, end = var_970_end_0, end_mask = var_970_end_mask_0, squeeze_mask = var_970_squeeze_mask_0, x = var_967_cast_fp16)[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<int32, [4]> var_985_begin_0 = const()[name = tensor<string, []>("op_985_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_985_end_0 = const()[name = tensor<string, []>("op_985_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_985_end_mask_0 = const()[name = tensor<string, []>("op_985_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_985_cast_fp16 = slice_by_index(begin = var_985_begin_0, end = var_985_end_0, end_mask = var_985_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_985_cast_fp16")];
+            tensor<int32, [4]> var_988_begin_0 = const()[name = tensor<string, []>("op_988_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_988_end_0 = const()[name = tensor<string, []>("op_988_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_988_end_mask_0 = const()[name = tensor<string, []>("op_988_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_988_squeeze_mask_0 = const()[name = tensor<string, []>("op_988_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, squeeze_mask = var_988_squeeze_mask_0, x = var_985_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
+            tensor<int32, [4]> var_1003_begin_0 = const()[name = tensor<string, []>("op_1003_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_1003_end_0 = const()[name = tensor<string, []>("op_1003_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_1003_end_mask_0 = const()[name = tensor<string, []>("op_1003_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1003_cast_fp16")];
+            tensor<int32, [4]> var_1006_begin_0 = const()[name = tensor<string, []>("op_1006_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1006_end_0 = const()[name = tensor<string, []>("op_1006_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1006_end_mask_0 = const()[name = tensor<string, []>("op_1006_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1006_squeeze_mask_0 = const()[name = tensor<string, []>("op_1006_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1006_cast_fp16 = slice_by_index(begin = var_1006_begin_0, end = var_1006_end_0, end_mask = var_1006_end_mask_0, squeeze_mask = var_1006_squeeze_mask_0, x = var_1003_cast_fp16)[name = tensor<string, []>("op_1006_cast_fp16")];
+            tensor<int32, [4]> var_1021_begin_0 = const()[name = tensor<string, []>("op_1021_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_1021_end_0 = const()[name = tensor<string, []>("op_1021_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_1021_end_mask_0 = const()[name = tensor<string, []>("op_1021_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1021_cast_fp16 = slice_by_index(begin = var_1021_begin_0, end = var_1021_end_0, end_mask = var_1021_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1021_cast_fp16")];
+            tensor<int32, [4]> var_1024_begin_0 = const()[name = tensor<string, []>("op_1024_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1024_end_0 = const()[name = tensor<string, []>("op_1024_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1024_end_mask_0 = const()[name = tensor<string, []>("op_1024_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1024_squeeze_mask_0 = const()[name = tensor<string, []>("op_1024_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1024_cast_fp16 = slice_by_index(begin = var_1024_begin_0, end = var_1024_end_0, end_mask = var_1024_end_mask_0, squeeze_mask = var_1024_squeeze_mask_0, x = var_1021_cast_fp16)[name = tensor<string, []>("op_1024_cast_fp16")];
+            tensor<int32, [4]> var_1039_begin_0 = const()[name = tensor<string, []>("op_1039_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_1039_end_0 = const()[name = tensor<string, []>("op_1039_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_1039_end_mask_0 = const()[name = tensor<string, []>("op_1039_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1039_cast_fp16 = slice_by_index(begin = var_1039_begin_0, end = var_1039_end_0, end_mask = var_1039_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<int32, [4]> var_1042_begin_0 = const()[name = tensor<string, []>("op_1042_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1042_end_0 = const()[name = tensor<string, []>("op_1042_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1042_end_mask_0 = const()[name = tensor<string, []>("op_1042_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1042_squeeze_mask_0 = const()[name = tensor<string, []>("op_1042_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, squeeze_mask = var_1042_squeeze_mask_0, x = var_1039_cast_fp16)[name = tensor<string, []>("op_1042_cast_fp16")];
+            tensor<int32, [4]> var_1057_begin_0 = const()[name = tensor<string, []>("op_1057_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_1057_end_0 = const()[name = tensor<string, []>("op_1057_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1500])];
+            tensor<bool, [4]> var_1057_end_mask_0 = const()[name = tensor<string, []>("op_1057_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_1057_cast_fp16 = slice_by_index(begin = var_1057_begin_0, end = var_1057_end_0, end_mask = var_1057_end_mask_0, x = obj_55_cast_fp16)[name = tensor<string, []>("op_1057_cast_fp16")];
+            tensor<int32, [4]> var_1060_begin_0 = const()[name = tensor<string, []>("op_1060_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1060_end_0 = const()[name = tensor<string, []>("op_1060_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_1060_end_mask_0 = const()[name = tensor<string, []>("op_1060_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1060_squeeze_mask_0 = const()[name = tensor<string, []>("op_1060_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_1060_cast_fp16 = slice_by_index(begin = var_1060_begin_0, end = var_1060_end_0, end_mask = var_1060_end_mask_0, squeeze_mask = var_1060_squeeze_mask_0, x = var_1057_cast_fp16)[name = tensor<string, []>("op_1060_cast_fp16")];
+            tensor<int32, []> var_1067 = const()[name = tensor<string, []>("op_1067"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_1068_interleave_0 = const()[name = tensor<string, []>("op_1068_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 6, 1500]> var_1068_cast_fp16 = concat(axis = var_1067, interleave = var_1068_interleave_0, values = (var_970_cast_fp16, var_988_cast_fp16, var_1006_cast_fp16, var_1024_cast_fp16, var_1042_cast_fp16, var_1060_cast_fp16))[name = tensor<string, []>("op_1068_cast_fp16")];
+            tensor<int32, [1]> var_1070 = const()[name = tensor<string, []>("op_1070"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_1071 = const()[name = tensor<string, []>("op_1071"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = var_1070, keep_dims = var_1071, x = var_1068_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-tiny/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-tiny/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..b1125838f2db9d18d70c5c297609654e9ec15b3c
--- /dev/null
+++ b/openai_whisper-tiny/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1afdfc3a8f3e8d6afc46e1ecc5fb216eadccbf82d9c568e7dbd3955143a1cd0e
+size 113134
diff --git a/openai_whisper-tiny/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-tiny/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3dcac898a051ba89f37f4a1bc2f6ede2d7ad213b
--- /dev/null
+++ b/openai_whisper-tiny/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0313e1a4ffa88538c141cc3c73e6eb0e3dc54db9d574b21c7c034de688e4951
+size 59216434
diff --git a/openai_whisper-tiny/config.json b/openai_whisper-tiny/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..543fc73de943d608a1370ac9ae7916bdda2e76bc
--- /dev/null
+++ b/openai_whisper-tiny/config.json
@@ -0,0 +1 @@
+{"_name_or_path": "openai/whisper-tiny", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 384, "decoder_attention_heads": 6, "decoder_ffn_dim": 1536, "decoder_layerdrop": 0.0, "decoder_layers": 4, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 6, "encoder_ffn_dim": 1536, "encoder_layerdrop": 0.0, "encoder_layers": 4, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 4, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865}
\ No newline at end of file
diff --git a/openai_whisper-tiny/generation_config.json b/openai_whisper-tiny/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d347402dfedd5e005cfab2688cb1b92fc971c7f
--- /dev/null
+++ b/openai_whisper-tiny/generation_config.json
@@ -0,0 +1 @@
+{"alignment_heads": [[2, 2], [3, 0], [3, 2], [3, 3], [3, 4], [3, 5]], "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "decoder_start_token_id": 50258, "eos_token_id": 50257, "forced_decoder_ids": [[1, null], [2, 50359]], "is_multilingual": true, "lang_to_id": {"<|af|>": 50327, "<|am|>": 50334, "<|ar|>": 50272, "<|as|>": 50350, "<|az|>": 50304, "<|ba|>": 50355, "<|be|>": 50330, "<|bg|>": 50292, "<|bn|>": 50302, "<|bo|>": 50347, "<|br|>": 50309, "<|bs|>": 50315, "<|ca|>": 50270, "<|cs|>": 50283, "<|cy|>": 50297, "<|da|>": 50285, "<|de|>": 50261, "<|el|>": 50281, "<|en|>": 50259, "<|es|>": 50262, "<|et|>": 50307, "<|eu|>": 50310, "<|fa|>": 50300, "<|fi|>": 50277, "<|fo|>": 50338, "<|fr|>": 50265, "<|gl|>": 50319, "<|gu|>": 50333, "<|haw|>": 50352, "<|ha|>": 50354, "<|he|>": 50279, "<|hi|>": 50276, "<|hr|>": 50291, "<|ht|>": 50339, "<|hu|>": 50286, "<|hy|>": 50312, "<|id|>": 50275, "<|is|>": 50311, "<|it|>": 50274, "<|ja|>": 50266, "<|jw|>": 50356, "<|ka|>": 50329, "<|kk|>": 50316, "<|km|>": 50323, "<|kn|>": 50306, "<|ko|>": 50264, "<|la|>": 50294, "<|lb|>": 50345, "<|ln|>": 50353, "<|lo|>": 50336, "<|lt|>": 50293, "<|lv|>": 50301, "<|mg|>": 50349, "<|mi|>": 50295, "<|mk|>": 50308, "<|ml|>": 50296, "<|mn|>": 50314, "<|mr|>": 50320, "<|ms|>": 50282, "<|mt|>": 50343, "<|my|>": 50346, "<|ne|>": 50313, "<|nl|>": 50271, "<|nn|>": 50342, "<|no|>": 50288, "<|oc|>": 50328, "<|pa|>": 50321, "<|pl|>": 50269, "<|ps|>": 50340, "<|pt|>": 50267, "<|ro|>": 50284, "<|ru|>": 50263, "<|sa|>": 50344, "<|sd|>": 50332, "<|si|>": 50322, "<|sk|>": 50298, "<|sl|>": 50305, "<|sn|>": 50324, "<|so|>": 50326, "<|sq|>": 50317, "<|sr|>": 50303, "<|su|>": 50357, "<|sv|>": 50273, "<|sw|>": 50318, "<|ta|>": 50287, "<|te|>": 50299, "<|tg|>": 50331, "<|th|>": 50289, "<|tk|>": 50341, "<|tl|>": 50348, "<|tr|>": 50268, "<|tt|>": 50351, "<|uk|>": 50280, "<|ur|>": 50290, "<|uz|>": 50337, "<|vi|>": 50278, "<|yi|>": 50335, "<|yo|>": 50325, "<|zh|>": 50260}, "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50363, "pad_token_id": 50257, "prev_sot_token_id": 50361, "return_timestamps": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50358, 50359, 50360, 50361, 50362], "task_to_id": {"transcribe": 50359, "translate": 50358}, "transformers_version": "4.31.0.dev0"}
\ No newline at end of file