diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin index 795642f5d9e77353f486f59d448752d2e2b316a2..1bc3db5f787e9d3cb34063d8fbd93f821a26b7a7 100644 --- a/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2184893e48a9da76b01012a32cca3e2ebfd4080553daa78318fe2391679dd7fe +oid sha256:75aec049f180c798ac703b21d39421b0cb60122e24f20933de1e3729242356ac size 243 diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin index 5cd381ac5c32ecf671fe48f419b6af48cf1f229e..1c7c25ba0d607c3495ce3d4f47f73701d546744b 100644 --- a/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8dbc016e9274c2a01d9eddb55dfd163a8ae74e7e97f0932268602c1a8b14903c -size 407 +oid sha256:8eea6be9d5db2d6fafee48085cec7ae6575566482e1f313ab8eaf35b75c0fbdf +size 409 diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json index 6913dc8ae5dea890292ef4531ac575b2245175b9..968b56eba1885e1ae29e3be4637b2ebc8cf97e86 100644 --- a/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" } @@ -52,9 +52,10 @@ "Select" : 2, "Tile" : 2, "Ios16.sub" : 3, - "Transpose" : 1, + "Transpose" : 2, "Ios16.gather" : 3, - "ExpandDims" : 4, + "ExpandDims" : 3, + "Ios16.reshape" : 1, "Ios16.maximum" : 1, "Ios16.less" : 2 }, @@ -74,7 +75,7 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { @@ -98,7 +99,7 @@ "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk1", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk1", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/model.mil b/Llama-2-7b-hf_chunk1.mlmodelc/model.mil index 4613d6c3ac483995c10803555b436a36a44176ae..9ed7f2c9fe1d6a5166d897e2e881e9aad196c0b9 100644 --- a/Llama-2-7b-hf_chunk1.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk1.mlmodelc/model.mil @@ -1,48 +1,50 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { func main(tensor full_sequence_length, tensor input_ids) { tensor T = const()[name = tensor("T"), val = tensor([64])]; - tensor x_axis_0 = const()[name = tensor("x_axis_0"), val = tensor(0)]; - tensor x_batch_dims_0 = const()[name = tensor("x_batch_dims_0"), val = tensor(0)]; + tensor x_1_axis_0 = const()[name = tensor("x_1_axis_0"), val = tensor(0)]; + tensor x_1_batch_dims_0 = const()[name = tensor("x_1_batch_dims_0"), val = tensor(0)]; tensor wte_weight_to_fp16 = const()[name = tensor("wte_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; - tensor x_cast_fp16 = gather(axis = x_axis_0, batch_dims = x_batch_dims_0, indices = input_ids, x = wte_weight_to_fp16)[name = tensor("x_cast_fp16")]; - tensor var_16_perm_0 = const()[name = tensor("op_16_perm_0"), val = tensor([0, 2, 1])]; - tensor var_18_axes_0 = const()[name = tensor("op_18_axes_0"), val = tensor([2])]; - tensor transpose_0 = transpose(perm = var_16_perm_0, x = x_cast_fp16)[name = tensor("transpose_0")]; - tensor x = expand_dims(axes = var_18_axes_0, x = transpose_0)[name = tensor("op_18_cast_fp16")]; + tensor x_1_cast_fp16 = gather(axis = x_1_axis_0, batch_dims = x_1_batch_dims_0, indices = input_ids, x = wte_weight_to_fp16)[name = tensor("x_1_cast_fp16")]; + tensor x_perm_0 = const()[name = tensor("x_perm_0"), val = tensor([0, 2, 1])]; + tensor var_27 = const()[name = tensor("op_27"), val = tensor([1, 4096, -1, 8])]; + tensor x_cast_fp16 = transpose(perm = x_perm_0, x = x_1_cast_fp16)[name = tensor("transpose_1")]; + tensor x = reshape(shape = var_27, x = x_cast_fp16)[name = tensor("op_28_cast_fp16")]; tensor pos_offset = sub(x = T, y = full_sequence_length)[name = tensor("pos_offset")]; - tensor var_26 = const()[name = tensor("op_26"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])]; - tensor input_pos_1 = sub(x = var_26, y = pos_offset)[name = tensor("input_pos_1")]; - tensor var_34 = const()[name = tensor("op_34"), val = tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]; - tensor input_pos = maximum(x = input_pos_1, y = var_34)[name = tensor("input_pos")]; - tensor var_45 = const()[name = tensor("op_45"), val = tensor(1)]; - tensor var_46_batch_dims_0 = const()[name = tensor("op_46_batch_dims_0"), val = tensor(0)]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262144128)))]; - tensor cos = gather(axis = var_45, batch_dims = var_46_batch_dims_0, indices = input_pos, x = var_44_to_fp16)[name = tensor("op_46_cast_fp16")]; - tensor var_56 = const()[name = tensor("op_56"), val = tensor(1)]; - tensor var_57_batch_dims_0 = const()[name = tensor("op_57_batch_dims_0"), val = tensor(0)]; - tensor var_55_to_fp16 = const()[name = tensor("op_55_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262275264)))]; - tensor sin = gather(axis = var_56, batch_dims = var_57_batch_dims_0, indices = input_pos, x = var_55_to_fp16)[name = tensor("op_57_cast_fp16")]; - tensor var_92 = const()[name = tensor("op_92"), val = tensor([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63]])]; - tensor var_95 = less(x = var_92, y = pos_offset)[name = tensor("op_95")]; - tensor var_95_after_broadcast_reps_0 = const()[name = tensor("op_95_after_broadcast_reps_0"), val = tensor([1, 512])]; - tensor var_95_after_broadcast = tile(reps = var_95_after_broadcast_reps_0, x = var_95)[name = tensor("op_95_after_broadcast")]; + tensor var_36 = const()[name = tensor("op_36"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])]; + tensor input_pos_1 = sub(x = var_36, y = pos_offset)[name = tensor("input_pos_1")]; + tensor var_44 = const()[name = tensor("op_44"), val = tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]; + tensor input_pos = maximum(x = input_pos_1, y = var_44)[name = tensor("input_pos")]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(1)]; + tensor var_56_batch_dims_0 = const()[name = tensor("op_56_batch_dims_0"), val = tensor(0)]; + tensor var_54_to_fp16 = const()[name = tensor("op_54_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262144128)))]; + tensor cos = gather(axis = var_55, batch_dims = var_56_batch_dims_0, indices = input_pos, x = var_54_to_fp16)[name = tensor("op_56_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor(1)]; + tensor var_67_batch_dims_0 = const()[name = tensor("op_67_batch_dims_0"), val = tensor(0)]; + tensor var_65_to_fp16 = const()[name = tensor("op_65_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262275264)))]; + tensor sin = gather(axis = var_66, batch_dims = var_67_batch_dims_0, indices = input_pos, x = var_65_to_fp16)[name = tensor("op_67_cast_fp16")]; + tensor var_102 = const()[name = tensor("op_102"), val = tensor([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63]])]; + tensor var_105 = less(x = var_102, y = pos_offset)[name = tensor("op_105")]; + tensor var_105_after_broadcast_reps_0 = const()[name = tensor("op_105_after_broadcast_reps_0"), val = tensor([1, 512])]; + tensor var_105_after_broadcast = tile(reps = var_105_after_broadcast_reps_0, x = var_105)[name = tensor("op_105_after_broadcast")]; tensor all_mask_to_fp16 = const()[name = tensor("all_mask_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262406400)))]; tensor m_1_to_fp16 = const()[name = tensor("m_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262472000)))]; - tensor m_3_cast_fp16 = select(a = all_mask_to_fp16, b = m_1_to_fp16, cond = var_95_after_broadcast)[name = tensor("m_3_cast_fp16")]; - tensor var_105 = const()[name = tensor("op_105"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])]; - tensor var_106 = const()[name = tensor("op_106"), val = tensor(512)]; - tensor var_108 = sub(x = var_106, y = full_sequence_length)[name = tensor("op_108")]; - tensor var_109 = less(x = var_105, y = var_108)[name = tensor("op_109")]; + tensor m_3_cast_fp16 = select(a = all_mask_to_fp16, b = m_1_to_fp16, cond = var_105_after_broadcast)[name = tensor("m_3_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])]; + tensor var_116 = const()[name = tensor("op_116"), val = tensor(512)]; + tensor var_118 = sub(x = var_116, y = full_sequence_length)[name = tensor("op_118")]; + tensor var_119 = less(x = var_115, y = var_118)[name = tensor("op_119")]; tensor expand_dims_0_axes_0 = const()[name = tensor("expand_dims_0_axes_0"), val = tensor([0])]; - tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = var_109)[name = tensor("expand_dims_0")]; - tensor var_109_after_broadcast_reps_0 = const()[name = tensor("op_109_after_broadcast_reps_0"), val = tensor([64, 1])]; - tensor var_109_after_broadcast = tile(reps = var_109_after_broadcast_reps_0, x = expand_dims_0)[name = tensor("op_109_after_broadcast")]; - tensor m_cast_fp16 = select(a = all_mask_to_fp16, b = m_3_cast_fp16, cond = var_109_after_broadcast)[name = tensor("m_cast_fp16")]; - tensor var_112_axes_0 = const()[name = tensor("op_112_axes_0"), val = tensor([0])]; - tensor var_112_cast_fp16 = expand_dims(axes = var_112_axes_0, x = m_cast_fp16)[name = tensor("op_112_cast_fp16")]; - tensor var_114_axes_0 = const()[name = tensor("op_114_axes_0"), val = tensor([0])]; - tensor mask = expand_dims(axes = var_114_axes_0, x = var_112_cast_fp16)[name = tensor("op_114_cast_fp16")]; + tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = var_119)[name = tensor("expand_dims_0")]; + tensor var_119_after_broadcast_reps_0 = const()[name = tensor("op_119_after_broadcast_reps_0"), val = tensor([64, 1])]; + tensor var_119_after_broadcast = tile(reps = var_119_after_broadcast_reps_0, x = expand_dims_0)[name = tensor("op_119_after_broadcast")]; + tensor m_cast_fp16 = select(a = all_mask_to_fp16, b = m_3_cast_fp16, cond = var_119_after_broadcast)[name = tensor("m_cast_fp16")]; + tensor var_122_axes_0 = const()[name = tensor("op_122_axes_0"), val = tensor([0])]; + tensor var_122_cast_fp16 = expand_dims(axes = var_122_axes_0, x = m_cast_fp16)[name = tensor("op_122_cast_fp16")]; + tensor var_124_axes_0 = const()[name = tensor("op_124_axes_0"), val = tensor([0])]; + tensor var_124_cast_fp16 = expand_dims(axes = var_124_axes_0, x = var_122_cast_fp16)[name = tensor("op_124_cast_fp16")]; + tensor var_129 = const()[name = tensor("op_129"), val = tensor([0, 3, 1, 2])]; + tensor mask = transpose(perm = var_129, x = var_124_cast_fp16)[name = tensor("transpose_0")]; } -> (x, cos, sin, mask); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin index 4fe6a2bd0bcbbf09db93eddc06361f9091faeef3..c7405e4c736d61adcb482e4a054aef454201f9a8 100644 --- a/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75a8ba0e4d6fc824f820051588b446e6b72dfb09497a058e443ab071d9b3cbc7 +oid sha256:098ec04b28e4314c7c3e9a3bf0f5ee4e7e9211d49ed367953c5d0fbc0f36d13c size 262537600 diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin index 6a8e1fcd6e9aac86c476bdfef211aba9441a747c..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b79e263bb20b8a02d650dad2c3eee71ff787829f337aedacb6cd4e1b61c1ce23 -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json index 3c26103bafb4738c6fd5b51b37c02a73f2b4b983..a240801b8e13cb2152cb7676d64d8590cc9a4d33 100644 --- a/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk10", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk10", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/model.mil b/Llama-2-7b-hf_chunk10.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk10.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk10.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin index ef3438de9426f16ce0e72e62a2db5afd5a560998..b2d56851cf8d3f54f7366beeeedf44ff1fd464fe 100644 --- a/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86d4446017950797cf7896941f17c78be0e7c925911e4555f70b1133d20f77b9 -size 303872704 +oid sha256:d316f3ae860ed5659a53ae6c81a8e8a352eaa73244c08f3fdbbfe4fcdd8f1be2 +size 303873856 diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json index 13c34f866bf7312ea4a562467c8931a51d7d9932..dc581bad2d5e909439e876a3f95a668e916e4362 100644 --- a/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk11", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk11", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/model.mil b/Llama-2-7b-hf_chunk11.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk11.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk11.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin index 96a7b41de6393e0867ae5246888049739e50f0da..6152547871f9b6ad680018f5b6a3f9693f173f6b 100644 --- a/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9506f3438a1c857418b2dd28a4631b401f24e3bd606f0427c7adbf510af1e2dc -size 303872704 +oid sha256:baecb2f13f8f1adf620bb882d56cf3b6adbc1fe32d9d07645fb8bdfe81e55d9d +size 303873856 diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin index 94b21e541586d2c345445cad639367e7c6a0a244..f1edde10be24e9867f7cc276846226d9c4f47f68 100644 --- a/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a73e9cc1e9aaa1351af7ee9af6a10c0d8fd805fe2383635cee1714240351b5c2 +oid sha256:7e2c7d4f699871cef135bb77d11029f37679666a0b22b160dcd45a6ef9be60c9 size 243 diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin index 93a5d37e577f7cb83d1e9df8e195e00cc041ac40..0e55ffe9ca5d535627fb0f0b0111d9f79882115b 100644 --- a/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e030d81f910b53587cf130f1dba0c1d731ab715ebd6ca0b4f475da21707b21e -size 651 +oid sha256:8148a192361b8ecec063ca1450af657313ff478d464c1737f0a57502021593a7 +size 653 diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json index 00a650ba3e3350963ebd9abfbfa8401cc8be692c..3a81a6d67093e7b4ae7a1fa56bdd32fe8955ed5d 100644 --- a/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" } @@ -59,17 +59,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 12, - "Ios16.rsqrt" : 4, - "Ios16.mul" : 42, - "SliceByIndex" : 8, + "Concat" : 14, + "Ios16.mul" : 100, + "SliceByIndex" : 200, "Ios16.constexprLutToDense" : 14, + "Transpose" : 2, + "Ios16.einsum" : 128, "Ios16.conv" : 14, - "Ios16.add" : 14, - "Ios16.reduceMean" : 4, - "Ios16.matmul" : 4, - "Ios16.softmax" : 2, - "Ios16.reshape" : 8, + "Ios16.add" : 72, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 64, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, "Ios16.silu" : 2 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -87,17 +88,17 @@ }, "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", - "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1", + "com.github.apple.coremltools.source" : "torch==2.1.0" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -125,9 +126,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -135,9 +136,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -165,14 +166,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk12", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk12", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/model.mil b/Llama-2-7b-hf_chunk12.mlmodelc/model.mil index 8de6335625c13a25c5109a209d6bcfe39538e22b..1f6dd646621643f077aafa69b3df0c9faf5a7b16 100644 --- a/Llama-2-7b-hf_chunk12.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk12.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -16,273 +16,1545 @@ program(1.0) tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_14 = const()[name = tensor("op_14"), val = tensor(3)]; - tensor var_19 = const()[name = tensor("op_19"), val = tensor(-2)]; - tensor var_21 = const()[name = tensor("op_21"), val = tensor(-1)]; - tensor var_28 = const()[name = tensor("op_28"), val = tensor(1)]; - tensor var_29 = const()[name = tensor("op_29"), val = tensor(true)]; - tensor var_37_cast_fp16 = mul(x = x, y = x)[name = tensor("op_37_cast_fp16")]; - tensor var_38 = const()[name = tensor("op_38"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_38, keep_dims = var_29, x = var_37_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_40_to_fp16 = const()[name = tensor("op_40_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_41_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_40_to_fp16)[name = tensor("op_41_cast_fp16")]; - tensor var_42_epsilon_0_to_fp16 = const()[name = tensor("op_42_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_42_cast_fp16 = rsqrt(epsilon = var_42_epsilon_0_to_fp16, x = var_41_cast_fp16)[name = tensor("op_42_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_42_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_54 = const()[name = tensor("op_54"), val = tensor([1, 1])]; - tensor var_56 = const()[name = tensor("op_56"), val = tensor([1, 1])]; - tensor var_58_pad_type_0 = const()[name = tensor("op_58_pad_type_0"), val = tensor("custom")]; - tensor var_58_pad_0 = const()[name = tensor("op_58_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_58_cast_fp16 = conv(dilations = var_56, groups = var_28, pad = var_58_pad_0, pad_type = var_58_pad_type_0, strides = var_54, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_58_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202386176)))]; - tensor q_1_cast_fp16 = mul(x = var_58_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_62 = const()[name = tensor("op_62"), val = tensor([1, 1])]; - tensor var_64 = const()[name = tensor("op_64"), val = tensor([1, 1])]; - tensor var_66_pad_type_0 = const()[name = tensor("op_66_pad_type_0"), val = tensor("custom")]; - tensor var_66_pad_0 = const()[name = tensor("op_66_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_66_cast_fp16 = conv(dilations = var_64, groups = var_28, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_62, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_66_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202394432)))]; - tensor k_1_cast_fp16 = mul(x = var_66_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_70 = const()[name = tensor("op_70"), val = tensor([1, 1])]; - tensor var_72 = const()[name = tensor("op_72"), val = tensor([1, 1])]; - tensor var_74_pad_type_0 = const()[name = tensor("op_74_pad_type_0"), val = tensor("custom")]; - tensor var_74_pad_0 = const()[name = tensor("op_74_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_74_cast_fp16 = conv(dilations = var_72, groups = var_28, pad = var_74_pad_0, pad_type = var_74_pad_type_0, strides = var_70, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_74_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202402688)))]; - tensor v_1_cast_fp16 = mul(x = var_74_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_76, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_78 = const()[name = tensor("op_78"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_78, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_80, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_92_begin_0 = const()[name = tensor("op_92_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_92_end_0 = const()[name = tensor("op_92_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_92_end_mask_0 = const()[name = tensor("op_92_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_92_cast_fp16 = slice_by_index(begin = var_92_begin_0, end = var_92_end_0, end_mask = var_92_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_92_cast_fp16")]; - tensor var_98_begin_0 = const()[name = tensor("op_98_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_98_end_0 = const()[name = tensor("op_98_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_98_end_mask_0 = const()[name = tensor("op_98_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_98_cast_fp16 = slice_by_index(begin = var_98_begin_0, end = var_98_end_0, end_mask = var_98_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_98_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_100_cast_fp16 = mul(x = var_98_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_100_cast_fp16")]; + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor(1)]; + tensor var_63 = const()[name = tensor("op_63"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_60, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_63, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_87_to_fp16 = const()[name = tensor("op_87_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_87_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202378112)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_109, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 1])]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117_pad_type_0 = const()[name = tensor("op_117_pad_type_0"), val = tensor("custom")]; + tensor var_117_pad_0 = const()[name = tensor("op_117_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_117_cast_fp16 = conv(dilations = var_115, groups = var_60, pad = var_117_pad_0, pad_type = var_117_pad_type_0, strides = var_113, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_117_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202386368)))]; + tensor q_1_cast_fp16 = mul(x = var_117_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_121 = const()[name = tensor("op_121"), val = tensor([1, 1])]; + tensor var_123 = const()[name = tensor("op_123"), val = tensor([1, 1])]; + tensor var_125_pad_type_0 = const()[name = tensor("op_125_pad_type_0"), val = tensor("custom")]; + tensor var_125_pad_0 = const()[name = tensor("op_125_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_125_cast_fp16 = conv(dilations = var_123, groups = var_60, pad = var_125_pad_0, pad_type = var_125_pad_type_0, strides = var_121, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_125_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202394624)))]; + tensor k_1_cast_fp16 = mul(x = var_125_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_129 = const()[name = tensor("op_129"), val = tensor([1, 1])]; + tensor var_131 = const()[name = tensor("op_131"), val = tensor([1, 1])]; + tensor var_133_pad_type_0 = const()[name = tensor("op_133_pad_type_0"), val = tensor("custom")]; + tensor var_133_pad_0 = const()[name = tensor("op_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_133_cast_fp16 = conv(dilations = var_131, groups = var_60, pad = var_133_pad_0, pad_type = var_133_pad_type_0, strides = var_129, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_133_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202402880)))]; + tensor v_1_cast_fp16 = mul(x = var_133_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_135, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_137 = const()[name = tensor("op_137"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_137, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_151_begin_0 = const()[name = tensor("op_151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_151_end_0 = const()[name = tensor("op_151_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_151_end_mask_0 = const()[name = tensor("op_151_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_151_cast_fp16 = slice_by_index(begin = var_151_begin_0, end = var_151_end_0, end_mask = var_151_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_151_cast_fp16")]; + tensor var_157_begin_0 = const()[name = tensor("op_157_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_157_end_0 = const()[name = tensor("op_157_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_157_end_mask_0 = const()[name = tensor("op_157_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_157_cast_fp16 = slice_by_index(begin = var_157_begin_0, end = var_157_end_0, end_mask = var_157_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_157_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_159_cast_fp16 = mul(x = var_157_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_159_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_19, interleave = rotated_1_interleave_0, values = (var_100_cast_fp16, var_92_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_103_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_103_cast_fp16")]; - tensor var_104_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_104_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_103_cast_fp16, y = var_104_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_117_begin_0 = const()[name = tensor("op_117_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_117_end_0 = const()[name = tensor("op_117_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_117_end_mask_0 = const()[name = tensor("op_117_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_117_cast_fp16 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_117_cast_fp16")]; - tensor var_123_begin_0 = const()[name = tensor("op_123_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_123_end_0 = const()[name = tensor("op_123_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_123_end_mask_0 = const()[name = tensor("op_123_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = var_123_end_0, end_mask = var_123_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_123_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_125_cast_fp16 = mul(x = var_123_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_125_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_159_cast_fp16, var_151_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_162_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_162_cast_fp16")]; + tensor var_163_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_163_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_162_cast_fp16, y = var_163_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_176_begin_0 = const()[name = tensor("op_176_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_end_0 = const()[name = tensor("op_176_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_176_end_mask_0 = const()[name = tensor("op_176_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_176_cast_fp16 = slice_by_index(begin = var_176_begin_0, end = var_176_end_0, end_mask = var_176_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor var_182_begin_0 = const()[name = tensor("op_182_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_182_end_0 = const()[name = tensor("op_182_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_182_end_mask_0 = const()[name = tensor("op_182_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_182_cast_fp16 = slice_by_index(begin = var_182_begin_0, end = var_182_end_0, end_mask = var_182_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_182_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_184_cast_fp16 = mul(x = var_182_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_184_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_19, interleave = rotated_3_interleave_0, values = (var_125_cast_fp16, var_117_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_128_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_128_cast_fp16")]; - tensor var_129_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_129_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_128_cast_fp16, y = var_129_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_19, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_19, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_21, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_21, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_151_to_fp16 = const()[name = tensor("op_151_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_152_cast_fp16 = mul(x = q_5_cast_fp16, y = var_151_to_fp16)[name = tensor("op_152_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_152_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_160_cast_fp16 = softmax(axis = var_14, x = attn_weights_3_cast_fp16)[name = tensor("op_160_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_160_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_164 = const()[name = tensor("op_164"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_164, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 1])]; - tensor var_170 = const()[name = tensor("op_170"), val = tensor([1, 1])]; - tensor var_172_pad_type_0 = const()[name = tensor("op_172_pad_type_0"), val = tensor("custom")]; - tensor var_172_pad_0 = const()[name = tensor("op_172_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_172_cast_fp16 = conv(dilations = var_170, groups = var_28, pad = var_172_pad_0, pad_type = var_172_pad_type_0, strides = var_168, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_172_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202410944)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_172_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_181_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_181_cast_fp16")]; - tensor var_182 = const()[name = tensor("op_182"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_182, keep_dims = var_29, x = var_181_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_184_to_fp16 = const()[name = tensor("op_184_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_185_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_184_to_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186_epsilon_0_to_fp16 = const()[name = tensor("op_186_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_186_cast_fp16 = rsqrt(epsilon = var_186_epsilon_0_to_fp16, x = var_185_cast_fp16)[name = tensor("op_186_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_186_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202419200)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 1])]; - tensor var_200 = const()[name = tensor("op_200"), val = tensor([1, 1])]; - tensor var_202_pad_type_0 = const()[name = tensor("op_202_pad_type_0"), val = tensor("custom")]; - tensor var_202_pad_0 = const()[name = tensor("op_202_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_202_cast_fp16 = conv(dilations = var_200, groups = var_28, pad = var_202_pad_0, pad_type = var_202_pad_type_0, strides = var_198, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_202_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202427456)))]; - tensor input_5_cast_fp16 = mul(x = var_202_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_206 = const()[name = tensor("op_206"), val = tensor([1, 1])]; - tensor var_208 = const()[name = tensor("op_208"), val = tensor([1, 1])]; - tensor var_210_pad_type_0 = const()[name = tensor("op_210_pad_type_0"), val = tensor("custom")]; - tensor var_210_pad_0 = const()[name = tensor("op_210_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_210_cast_fp16 = conv(dilations = var_208, groups = var_28, pad = var_210_pad_0, pad_type = var_210_pad_type_0, strides = var_206, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_210_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202449536)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_210_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_212_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_212_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_212_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_216 = const()[name = tensor("op_216"), val = tensor([1, 1])]; - tensor var_218 = const()[name = tensor("op_218"), val = tensor([1, 1])]; - tensor var_220_pad_type_0 = const()[name = tensor("op_220_pad_type_0"), val = tensor("custom")]; - tensor var_220_pad_0 = const()[name = tensor("op_220_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_220_cast_fp16 = conv(dilations = var_218, groups = var_28, pad = var_220_pad_0, pad_type = var_220_pad_type_0, strides = var_216, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_220_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202471616)))]; - tensor var_221_cast_fp16 = mul(x = var_220_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_221_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_221_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_228 = const()[name = tensor("op_228"), val = tensor(3)]; - tensor var_233 = const()[name = tensor("op_233"), val = tensor(-2)]; - tensor var_235 = const()[name = tensor("op_235"), val = tensor(-1)]; - tensor var_242 = const()[name = tensor("op_242"), val = tensor(1)]; - tensor var_243 = const()[name = tensor("op_243"), val = tensor(true)]; - tensor var_250_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_250_cast_fp16")]; - tensor var_251 = const()[name = tensor("op_251"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_251, keep_dims = var_243, x = var_250_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_253_to_fp16 = const()[name = tensor("op_253_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_254_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_253_to_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255_epsilon_0_to_fp16 = const()[name = tensor("op_255_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_255_cast_fp16 = rsqrt(epsilon = var_255_epsilon_0_to_fp16, x = var_254_cast_fp16)[name = tensor("op_255_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_255_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202479872)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_270 = const()[name = tensor("op_270"), val = tensor([1, 1])]; - tensor var_272 = const()[name = tensor("op_272"), val = tensor([1, 1])]; - tensor var_274_pad_type_0 = const()[name = tensor("op_274_pad_type_0"), val = tensor("custom")]; - tensor var_274_pad_0 = const()[name = tensor("op_274_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_274_cast_fp16 = conv(dilations = var_272, groups = var_242, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_270, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_274_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202488128)))]; - tensor q_7_cast_fp16 = mul(x = var_274_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_278 = const()[name = tensor("op_278"), val = tensor([1, 1])]; - tensor var_280 = const()[name = tensor("op_280"), val = tensor([1, 1])]; - tensor var_282_pad_type_0 = const()[name = tensor("op_282_pad_type_0"), val = tensor("custom")]; - tensor var_282_pad_0 = const()[name = tensor("op_282_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_282_cast_fp16 = conv(dilations = var_280, groups = var_242, pad = var_282_pad_0, pad_type = var_282_pad_type_0, strides = var_278, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_282_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202496384)))]; - tensor k_9_cast_fp16 = mul(x = var_282_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_286 = const()[name = tensor("op_286"), val = tensor([1, 1])]; - tensor var_288 = const()[name = tensor("op_288"), val = tensor([1, 1])]; - tensor var_290_pad_type_0 = const()[name = tensor("op_290_pad_type_0"), val = tensor("custom")]; - tensor var_290_pad_0 = const()[name = tensor("op_290_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_290_cast_fp16 = conv(dilations = var_288, groups = var_242, pad = var_290_pad_0, pad_type = var_290_pad_type_0, strides = var_286, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_290_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202504640)))]; - tensor v_7_cast_fp16 = mul(x = var_290_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_292, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_294 = const()[name = tensor("op_294"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_294, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_296, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_308_begin_0 = const()[name = tensor("op_308_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_308_end_0 = const()[name = tensor("op_308_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_308_end_mask_0 = const()[name = tensor("op_308_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_308_cast_fp16")]; - tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_314_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_316_cast_fp16 = mul(x = var_314_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_316_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_184_cast_fp16, var_176_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_187_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_187_cast_fp16")]; + tensor var_188_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_188_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_187_cast_fp16, y = var_188_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_191 = const()[name = tensor("op_191"), val = tensor([1, 4096, 1, 64])]; + tensor var_192_cast_fp16 = reshape(shape = var_191, x = roped_3_cast_fp16)[name = tensor("op_192_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_194 = const()[name = tensor("op_194"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_194, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_192_cast_fp16)[name = tensor("transpose_1")]; + tensor k_9_cast_fp16 = concat(axis = var_19, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_201 = const()[name = tensor("op_201"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_201, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_206_begin_0 = const()[name = tensor("op_206_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_end_0 = const()[name = tensor("op_206_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_206_end_mask_0 = const()[name = tensor("op_206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_206_cast_fp16 = slice_by_index(begin = var_206_begin_0, end = var_206_end_0, end_mask = var_206_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_336_begin_0 = const()[name = tensor("op_336_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_336_end_0 = const()[name = tensor("op_336_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_336_end_mask_0 = const()[name = tensor("op_336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = var_336_end_0, end_mask = var_336_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_336_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_462_begin_0 = const()[name = tensor("op_462_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_462_end_0 = const()[name = tensor("op_462_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_462_end_mask_0 = const()[name = tensor("op_462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_462_cast_fp16 = slice_by_index(begin = var_462_begin_0, end = var_462_end_0, end_mask = var_462_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_462_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_equation_0 = const()[name = tensor("op_590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_590_cast_fp16 = einsum(equation = var_590_equation_0, values = (var_336_cast_fp16, var_206_cast_fp16))[name = tensor("op_590_cast_fp16")]; + tensor var_591_to_fp16 = const()[name = tensor("op_591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_592_cast_fp16 = mul(x = var_590_cast_fp16, y = var_591_to_fp16)[name = tensor("op_592_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_592_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_749_cast_fp16 = softmax(axis = var_60, x = aw_1_cast_fp16)[name = tensor("op_749_cast_fp16")]; + tensor var_750_cast_fp16 = softmax(axis = var_60, x = aw_3_cast_fp16)[name = tensor("op_750_cast_fp16")]; + tensor var_751_cast_fp16 = softmax(axis = var_60, x = aw_5_cast_fp16)[name = tensor("op_751_cast_fp16")]; + tensor var_752_cast_fp16 = softmax(axis = var_60, x = aw_7_cast_fp16)[name = tensor("op_752_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_60, x = aw_9_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_60, x = aw_11_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_60, x = aw_13_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_60, x = aw_15_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_60, x = aw_17_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_60, x = aw_19_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_60, x = aw_21_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_60, x = aw_23_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_60, x = aw_25_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_60, x = aw_27_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_60, x = aw_29_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_60, x = aw_31_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_60, x = aw_33_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_60, x = aw_35_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_60, x = aw_37_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_60, x = aw_39_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_60, x = aw_41_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_60, x = aw_43_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_60, x = aw_45_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_60, x = aw_47_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_60, x = aw_49_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_60, x = aw_51_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_60, x = aw_53_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_60, x = aw_55_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_60, x = aw_57_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_60, x = aw_59_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_60, x = aw_61_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_60, x = aw_63_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_782_equation_0 = const()[name = tensor("op_782_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_782_cast_fp16 = einsum(equation = var_782_equation_0, values = (var_462_cast_fp16, var_749_cast_fp16))[name = tensor("op_782_cast_fp16")]; + tensor var_784_equation_0 = const()[name = tensor("op_784_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_784_cast_fp16 = einsum(equation = var_784_equation_0, values = (var_466_cast_fp16, var_750_cast_fp16))[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_470_cast_fp16, var_751_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_474_cast_fp16, var_752_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_478_cast_fp16, var_753_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_482_cast_fp16, var_754_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_486_cast_fp16, var_755_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_490_cast_fp16, var_756_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_494_cast_fp16, var_757_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_498_cast_fp16, var_758_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_502_cast_fp16, var_759_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_506_cast_fp16, var_760_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_510_cast_fp16, var_761_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_514_cast_fp16, var_762_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_518_cast_fp16, var_763_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_522_cast_fp16, var_764_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_526_cast_fp16, var_765_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_530_cast_fp16, var_766_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_534_cast_fp16, var_767_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_538_cast_fp16, var_768_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_542_cast_fp16, var_769_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_546_cast_fp16, var_770_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_550_cast_fp16, var_771_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_554_cast_fp16, var_772_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_558_cast_fp16, var_773_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_562_cast_fp16, var_774_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_566_cast_fp16, var_775_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_570_cast_fp16, var_776_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_574_cast_fp16, var_777_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_578_cast_fp16, var_778_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_582_cast_fp16, var_779_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_586_cast_fp16, var_780_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_60, interleave = x_11_interleave_0, values = (var_782_cast_fp16, var_784_cast_fp16, var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_849, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 1])]; + tensor var_855 = const()[name = tensor("op_855"), val = tensor([1, 1])]; + tensor var_857_pad_type_0 = const()[name = tensor("op_857_pad_type_0"), val = tensor("custom")]; + tensor var_857_pad_0 = const()[name = tensor("op_857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_857_cast_fp16 = conv(dilations = var_855, groups = var_60, pad = var_857_pad_0, pad_type = var_857_pad_type_0, strides = var_853, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_857_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202411136)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_857_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202419392)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_60, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_63, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_882_to_fp16 = const()[name = tensor("op_882_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_882_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202419584)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_894 = const()[name = tensor("op_894"), val = tensor([1, 1])]; + tensor var_896 = const()[name = tensor("op_896"), val = tensor([1, 1])]; + tensor var_898_pad_type_0 = const()[name = tensor("op_898_pad_type_0"), val = tensor("custom")]; + tensor var_898_pad_0 = const()[name = tensor("op_898_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_898_cast_fp16 = conv(dilations = var_896, groups = var_60, pad = var_898_pad_0, pad_type = var_898_pad_type_0, strides = var_894, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_898_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202427840)))]; + tensor input_7_cast_fp16 = mul(x = var_898_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_902 = const()[name = tensor("op_902"), val = tensor([1, 1])]; + tensor var_904 = const()[name = tensor("op_904"), val = tensor([1, 1])]; + tensor var_906_pad_type_0 = const()[name = tensor("op_906_pad_type_0"), val = tensor("custom")]; + tensor var_906_pad_0 = const()[name = tensor("op_906_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_906_cast_fp16 = conv(dilations = var_904, groups = var_60, pad = var_906_pad_0, pad_type = var_906_pad_type_0, strides = var_902, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_906_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202449920)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_906_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_908_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_908_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_908_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_912 = const()[name = tensor("op_912"), val = tensor([1, 1])]; + tensor var_914 = const()[name = tensor("op_914"), val = tensor([1, 1])]; + tensor var_916_pad_type_0 = const()[name = tensor("op_916_pad_type_0"), val = tensor("custom")]; + tensor var_916_pad_0 = const()[name = tensor("op_916_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_916_cast_fp16 = conv(dilations = var_914, groups = var_60, pad = var_916_pad_0, pad_type = var_916_pad_type_0, strides = var_912, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_916_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202472000)))]; + tensor var_917_cast_fp16 = mul(x = var_916_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_917_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_917_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_923 = const()[name = tensor("op_923"), val = tensor(-1)]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-2)]; + tensor var_929 = const()[name = tensor("op_929"), val = tensor(-3)]; + tensor var_970 = const()[name = tensor("op_970"), val = tensor(1)]; + tensor var_973 = const()[name = tensor("op_973"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202480256)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_970, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_973, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_996_to_fp16 = const()[name = tensor("op_996_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_996_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202480448)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1021 = const()[name = tensor("op_1021"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1021, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 1])]; + tensor var_1027 = const()[name = tensor("op_1027"), val = tensor([1, 1])]; + tensor var_1029_pad_type_0 = const()[name = tensor("op_1029_pad_type_0"), val = tensor("custom")]; + tensor var_1029_pad_0 = const()[name = tensor("op_1029_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1029_cast_fp16 = conv(dilations = var_1027, groups = var_970, pad = var_1029_pad_0, pad_type = var_1029_pad_type_0, strides = var_1025, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1029_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202488704)))]; + tensor q_9_cast_fp16 = mul(x = var_1029_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1033 = const()[name = tensor("op_1033"), val = tensor([1, 1])]; + tensor var_1035 = const()[name = tensor("op_1035"), val = tensor([1, 1])]; + tensor var_1037_pad_type_0 = const()[name = tensor("op_1037_pad_type_0"), val = tensor("custom")]; + tensor var_1037_pad_0 = const()[name = tensor("op_1037_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1037_cast_fp16 = conv(dilations = var_1035, groups = var_970, pad = var_1037_pad_0, pad_type = var_1037_pad_type_0, strides = var_1033, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1037_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202496960)))]; + tensor k_11_cast_fp16 = mul(x = var_1037_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1041 = const()[name = tensor("op_1041"), val = tensor([1, 1])]; + tensor var_1043 = const()[name = tensor("op_1043"), val = tensor([1, 1])]; + tensor var_1045_pad_type_0 = const()[name = tensor("op_1045_pad_type_0"), val = tensor("custom")]; + tensor var_1045_pad_0 = const()[name = tensor("op_1045_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1045_cast_fp16 = conv(dilations = var_1043, groups = var_970, pad = var_1045_pad_0, pad_type = var_1045_pad_type_0, strides = var_1041, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1045_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202505216)))]; + tensor v_11_cast_fp16 = mul(x = var_1045_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1047, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1049 = const()[name = tensor("op_1049"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1049, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1063_begin_0 = const()[name = tensor("op_1063_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1063_end_0 = const()[name = tensor("op_1063_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1063_end_mask_0 = const()[name = tensor("op_1063_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1063_cast_fp16 = slice_by_index(begin = var_1063_begin_0, end = var_1063_end_0, end_mask = var_1063_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1063_cast_fp16")]; + tensor var_1069_begin_0 = const()[name = tensor("op_1069_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1069_end_0 = const()[name = tensor("op_1069_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1069_end_mask_0 = const()[name = tensor("op_1069_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1069_cast_fp16 = slice_by_index(begin = var_1069_begin_0, end = var_1069_end_0, end_mask = var_1069_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1069_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1071_cast_fp16 = mul(x = var_1069_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1071_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_233, interleave = rotated_5_interleave_0, values = (var_316_cast_fp16, var_308_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_319_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_319_cast_fp16")]; - tensor var_320_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_320_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_319_cast_fp16, y = var_320_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_333_begin_0 = const()[name = tensor("op_333_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_333_end_0 = const()[name = tensor("op_333_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_333_end_mask_0 = const()[name = tensor("op_333_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = var_333_end_0, end_mask = var_333_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_333_cast_fp16")]; - tensor var_339_begin_0 = const()[name = tensor("op_339_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_339_end_0 = const()[name = tensor("op_339_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_339_end_mask_0 = const()[name = tensor("op_339_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_339_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_341_cast_fp16 = mul(x = var_339_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_341_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_927, interleave = rotated_5_interleave_0, values = (var_1071_cast_fp16, var_1063_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1074_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1074_cast_fp16")]; + tensor var_1075_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1075_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1074_cast_fp16, y = var_1075_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1088_begin_0 = const()[name = tensor("op_1088_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1088_end_0 = const()[name = tensor("op_1088_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1088_end_mask_0 = const()[name = tensor("op_1088_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1088_cast_fp16 = slice_by_index(begin = var_1088_begin_0, end = var_1088_end_0, end_mask = var_1088_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1088_cast_fp16")]; + tensor var_1094_begin_0 = const()[name = tensor("op_1094_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1094_end_0 = const()[name = tensor("op_1094_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1094_end_mask_0 = const()[name = tensor("op_1094_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1094_cast_fp16 = slice_by_index(begin = var_1094_begin_0, end = var_1094_end_0, end_mask = var_1094_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1094_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1096_cast_fp16 = mul(x = var_1094_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1096_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_233, interleave = rotated_interleave_0, values = (var_341_cast_fp16, var_333_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_344_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_344_cast_fp16")]; - tensor var_345_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_345_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_344_cast_fp16, y = var_345_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_233, interleave = q_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_233, interleave = k_13_interleave_0, values = roped_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_927, interleave = rotated_interleave_0, values = (var_1096_cast_fp16, var_1088_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_1099_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1099_cast_fp16")]; + tensor var_1100_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_1100_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_1099_cast_fp16, y = var_1100_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_1103 = const()[name = tensor("op_1103"), val = tensor([1, 4096, 1, 64])]; + tensor var_1104_cast_fp16 = reshape(shape = var_1103, x = roped_cast_fp16)[name = tensor("op_1104_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1106 = const()[name = tensor("op_1106"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1106, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_235, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_235, interleave = v_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_cast_fp16")]; - tensor var_367_to_fp16 = const()[name = tensor("op_367_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_368_cast_fp16 = mul(x = q_cast_fp16, y = var_367_to_fp16)[name = tensor("op_368_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_368_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_376_cast_fp16 = softmax(axis = var_228, x = attn_weights_cast_fp16)[name = tensor("op_376_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_cast_fp16, y = var_376_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_380 = const()[name = tensor("op_380"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_380, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 1])]; - tensor var_386 = const()[name = tensor("op_386"), val = tensor([1, 1])]; - tensor var_388_pad_type_0 = const()[name = tensor("op_388_pad_type_0"), val = tensor("custom")]; - tensor var_388_pad_0 = const()[name = tensor("op_388_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_388_cast_fp16 = conv(dilations = var_386, groups = var_242, pad = var_388_pad_0, pad_type = var_388_pad_type_0, strides = var_384, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202512896)))]; - tensor attention_output_cast_fp16 = mul(x = var_388_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_397_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_397_cast_fp16")]; - tensor var_398 = const()[name = tensor("op_398"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_398, keep_dims = var_243, x = var_397_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_400_to_fp16 = const()[name = tensor("op_400_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_401_cast_fp16 = add(x = norm_x_cast_fp16, y = var_400_to_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402_epsilon_0_to_fp16 = const()[name = tensor("op_402_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_402_cast_fp16 = rsqrt(epsilon = var_402_epsilon_0_to_fp16, x = var_401_cast_fp16)[name = tensor("op_402_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_402_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202521152)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_414 = const()[name = tensor("op_414"), val = tensor([1, 1])]; - tensor var_416 = const()[name = tensor("op_416"), val = tensor([1, 1])]; - tensor var_418_pad_type_0 = const()[name = tensor("op_418_pad_type_0"), val = tensor("custom")]; - tensor var_418_pad_0 = const()[name = tensor("op_418_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_418_cast_fp16 = conv(dilations = var_416, groups = var_242, pad = var_418_pad_0, pad_type = var_418_pad_type_0, strides = var_414, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_418_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202529408)))]; - tensor input_13_cast_fp16 = mul(x = var_418_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_422 = const()[name = tensor("op_422"), val = tensor([1, 1])]; - tensor var_424 = const()[name = tensor("op_424"), val = tensor([1, 1])]; - tensor var_426_pad_type_0 = const()[name = tensor("op_426_pad_type_0"), val = tensor("custom")]; - tensor var_426_pad_0 = const()[name = tensor("op_426_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_426_cast_fp16 = conv(dilations = var_424, groups = var_242, pad = var_426_pad_0, pad_type = var_426_pad_type_0, strides = var_422, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_426_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202551488)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_426_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_428_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_428_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_428_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_432 = const()[name = tensor("op_432"), val = tensor([1, 1])]; - tensor var_434 = const()[name = tensor("op_434"), val = tensor([1, 1])]; - tensor var_436_pad_type_0 = const()[name = tensor("op_436_pad_type_0"), val = tensor("custom")]; - tensor var_436_pad_0 = const()[name = tensor("op_436_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_436_cast_fp16 = conv(dilations = var_434, groups = var_242, pad = var_436_pad_0, pad_type = var_436_pad_type_0, strides = var_432, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_436_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202573568)))]; - tensor var_437_cast_fp16 = mul(x = var_436_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_437_cast_fp16")]; - tensor new_x = add(x = var_437_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_438_cast_fp16")]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1104_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_929, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_923, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1113 = const()[name = tensor("op_1113"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_1113, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_1118_begin_0 = const()[name = tensor("op_1118_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1118_end_0 = const()[name = tensor("op_1118_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1118_end_mask_0 = const()[name = tensor("op_1118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1118_cast_fp16 = slice_by_index(begin = var_1118_begin_0, end = var_1118_end_0, end_mask = var_1118_end_mask_0, x = q_cast_fp16)[name = tensor("op_1118_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1248_begin_0 = const()[name = tensor("op_1248_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1248_end_0 = const()[name = tensor("op_1248_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1248_end_mask_0 = const()[name = tensor("op_1248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1248_cast_fp16 = slice_by_index(begin = var_1248_begin_0, end = var_1248_end_0, end_mask = var_1248_end_mask_0, x = k_cast_fp16)[name = tensor("op_1248_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_begin_0 = const()[name = tensor("op_1374_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1374_end_0 = const()[name = tensor("op_1374_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1374_end_mask_0 = const()[name = tensor("op_1374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1374_cast_fp16 = slice_by_index(begin = var_1374_begin_0, end = var_1374_end_0, end_mask = var_1374_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1374_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_equation_0 = const()[name = tensor("op_1502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1502_cast_fp16 = einsum(equation = var_1502_equation_0, values = (var_1248_cast_fp16, var_1118_cast_fp16))[name = tensor("op_1502_cast_fp16")]; + tensor var_1503_to_fp16 = const()[name = tensor("op_1503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1504_cast_fp16 = mul(x = var_1502_cast_fp16, y = var_1503_to_fp16)[name = tensor("op_1504_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1504_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1661_cast_fp16 = softmax(axis = var_970, x = aw_65_cast_fp16)[name = tensor("op_1661_cast_fp16")]; + tensor var_1662_cast_fp16 = softmax(axis = var_970, x = aw_67_cast_fp16)[name = tensor("op_1662_cast_fp16")]; + tensor var_1663_cast_fp16 = softmax(axis = var_970, x = aw_69_cast_fp16)[name = tensor("op_1663_cast_fp16")]; + tensor var_1664_cast_fp16 = softmax(axis = var_970, x = aw_71_cast_fp16)[name = tensor("op_1664_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_970, x = aw_73_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_970, x = aw_75_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_970, x = aw_77_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_970, x = aw_79_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_970, x = aw_81_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_970, x = aw_83_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_970, x = aw_85_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_970, x = aw_87_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_970, x = aw_89_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_970, x = aw_91_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_970, x = aw_93_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_970, x = aw_95_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_970, x = aw_97_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_970, x = aw_99_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_970, x = aw_101_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_970, x = aw_103_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_970, x = aw_105_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_970, x = aw_107_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_970, x = aw_109_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_970, x = aw_111_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_970, x = aw_113_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_970, x = aw_115_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_970, x = aw_117_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_970, x = aw_119_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_970, x = aw_121_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_970, x = aw_123_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_970, x = aw_125_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_970, x = aw_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1694_equation_0 = const()[name = tensor("op_1694_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1694_cast_fp16 = einsum(equation = var_1694_equation_0, values = (var_1374_cast_fp16, var_1661_cast_fp16))[name = tensor("op_1694_cast_fp16")]; + tensor var_1696_equation_0 = const()[name = tensor("op_1696_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1696_cast_fp16 = einsum(equation = var_1696_equation_0, values = (var_1378_cast_fp16, var_1662_cast_fp16))[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1382_cast_fp16, var_1663_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1386_cast_fp16, var_1664_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1390_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1394_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1398_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1402_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1406_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1410_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1414_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1418_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1422_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1426_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1430_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1434_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1438_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1442_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1446_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1450_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1454_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1458_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1462_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1466_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1470_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1474_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1478_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1482_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1486_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1490_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1494_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1498_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_970, interleave = x_27_interleave_0, values = (var_1694_cast_fp16, var_1696_cast_fp16, var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1761 = const()[name = tensor("op_1761"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1761, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 1])]; + tensor var_1767 = const()[name = tensor("op_1767"), val = tensor([1, 1])]; + tensor var_1769_pad_type_0 = const()[name = tensor("op_1769_pad_type_0"), val = tensor("custom")]; + tensor var_1769_pad_0 = const()[name = tensor("op_1769_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1769_cast_fp16 = conv(dilations = var_1767, groups = var_970, pad = var_1769_pad_0, pad_type = var_1769_pad_type_0, strides = var_1765, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1769_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202513472)))]; + tensor attention_output_cast_fp16 = mul(x = var_1769_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202521728)))]; + tensor x_eps_cast_fp16 = concat(axis = var_970, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_973, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1794_to_fp16 = const()[name = tensor("op_1794_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1794_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202521920)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1806 = const()[name = tensor("op_1806"), val = tensor([1, 1])]; + tensor var_1808 = const()[name = tensor("op_1808"), val = tensor([1, 1])]; + tensor var_1810_pad_type_0 = const()[name = tensor("op_1810_pad_type_0"), val = tensor("custom")]; + tensor var_1810_pad_0 = const()[name = tensor("op_1810_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1810_cast_fp16 = conv(dilations = var_1808, groups = var_970, pad = var_1810_pad_0, pad_type = var_1810_pad_type_0, strides = var_1806, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1810_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202530176)))]; + tensor input_17_cast_fp16 = mul(x = var_1810_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1814 = const()[name = tensor("op_1814"), val = tensor([1, 1])]; + tensor var_1816 = const()[name = tensor("op_1816"), val = tensor([1, 1])]; + tensor var_1818_pad_type_0 = const()[name = tensor("op_1818_pad_type_0"), val = tensor("custom")]; + tensor var_1818_pad_0 = const()[name = tensor("op_1818_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1818_cast_fp16 = conv(dilations = var_1816, groups = var_970, pad = var_1818_pad_0, pad_type = var_1818_pad_type_0, strides = var_1814, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1818_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202552256)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_1818_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1820_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1820_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1820_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1824 = const()[name = tensor("op_1824"), val = tensor([1, 1])]; + tensor var_1826 = const()[name = tensor("op_1826"), val = tensor([1, 1])]; + tensor var_1828_pad_type_0 = const()[name = tensor("op_1828_pad_type_0"), val = tensor("custom")]; + tensor var_1828_pad_0 = const()[name = tensor("op_1828_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1828_cast_fp16 = conv(dilations = var_1826, groups = var_970, pad = var_1828_pad_0, pad_type = var_1828_pad_type_0, strides = var_1824, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_1828_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202574336)))]; + tensor var_1829_cast_fp16 = mul(x = var_1828_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1829_cast_fp16")]; + tensor new_x = add(x = var_1829_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1830_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_v_cache_0, new_v_cache_1); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin index eccea461663c91bc20e74a994f2b883fcb80fcf2..cea89b2b66454c9dede31acfd9dc30fd55b5779c 100644 --- a/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3d085d837454b4685bcd36331b09a5b0b329f7ef4da1f2dbed101b7ec075630 -size 202581824 +oid sha256:8083e76257cb1c212c04f16e970bef147d1f06e4ccdbbe8d9c5f0ffebeed6d34 +size 202582592 diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin index c2d908c082131d02333987a46e271e2ecaea4168..5b989af19640e54d9184f4cb166bade4ab4b8425 100644 --- a/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55bbf17f4d2567d045baa3ae69337cad81c45f822491151ed7a5b29327f874f6 +oid sha256:3fe22d6d09bcc30eac1d0d0e786aff8bd12a35552070af919e88c7c9bcac0405 size 243 diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin index 3cb69fe22262b38b05d6e777c572388e81f07f9d..a409491a6bdaa4a41734bdbad51a67c3653c017e 100644 --- a/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d860ea43d6f8ebbf70594a29be6231ee1d324bdaf2f26417eb82297acb920e17 -size 309 +oid sha256:dcd130ddb6d42e6468c1c9ebc7cb2472a42812a6e04950ef5cf75498e5885fcc +size 311 diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json index 063ca7460a27e8e6592a5e49cf088fa80e79c39d..b64b5d3bfe5e42bc68df8f63b19f621375491497 100644 --- a/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json @@ -19,15 +19,14 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 1, - "Ios16.add" : 1, - "Ios16.mul" : 3, - "Ios16.rsqrt" : 1, + "Concat" : 2, + "Ios16.mul" : 2, + "Squeeze" : 1, "Transpose" : 1, - "Ios16.reshape" : 3, - "Ios16.reduceMean" : 1, + "Ios16.reshape" : 4, "Ios16.matmul" : 2, - "Squeeze" : 1 + "Ios16.realDiv" : 1, + "Ios16.reduceL2Norm" : 1 }, "computePrecision" : "Mixed (Float16, Int32)", "isUpdatable" : "0", @@ -45,21 +44,21 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk13", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk13", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/model.mil b/Llama-2-7b-hf_chunk13.mlmodelc/model.mil index bc8aeb22f04ed716b1900a2605fb171673aef783..1372bfb6c11406d8872cc2f617d5387dd71eebb9 100644 --- a/Llama-2-7b-hf_chunk13.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk13.mlmodelc/model.mil @@ -1,38 +1,41 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor x) { + func main(tensor x) { tensor var_6 = const()[name = tensor("op_6"), val = tensor(true)]; - tensor var_13_cast_fp16 = mul(x = x, y = x)[name = tensor("op_13_cast_fp16")]; - tensor var_14 = const()[name = tensor("op_14"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_14, keep_dims = var_6, x = var_13_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_16_to_fp16 = const()[name = tensor("op_16_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_17_cast_fp16 = add(x = norm_x_cast_fp16, y = var_16_to_fp16)[name = tensor("op_17_cast_fp16")]; - tensor var_18_epsilon_0_to_fp16 = const()[name = tensor("op_18_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_18_cast_fp16 = rsqrt(epsilon = var_18_epsilon_0_to_fp16, x = var_17_cast_fp16)[name = tensor("op_18_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_18_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor ln_f_weight_to_fp16 = const()[name = tensor("ln_f_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; - tensor x_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = ln_f_weight_to_fp16)[name = tensor("x_cast_fp16")]; - tensor var_23_axes_0 = const()[name = tensor("op_23_axes_0"), val = tensor([2])]; - tensor var_23_cast_fp16 = squeeze(axes = var_23_axes_0, x = x_cast_fp16)[name = tensor("op_23_cast_fp16")]; - tensor var_26_perm_0 = const()[name = tensor("op_26_perm_0"), val = tensor([0, 2, 1])]; + tensor var_9 = const()[name = tensor("op_9"), val = tensor(1)]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_cast_fp16 = concat(axis = var_9, interleave = x_eps_interleave_0, values = (x, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_6, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_34_to_fp16 = const()[name = tensor("op_34_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_34_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor ln_f_weight_to_fp16 = const()[name = tensor("ln_f_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = ln_f_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_48 = const()[name = tensor("op_48"), val = tensor([1, 4096, 1, -1])]; + tensor x_cast_fp16 = reshape(shape = var_48, x = x_5_cast_fp16)[name = tensor("x_cast_fp16")]; + tensor var_51_axes_0 = const()[name = tensor("op_51_axes_0"), val = tensor([2])]; + tensor var_51_cast_fp16 = squeeze(axes = var_51_axes_0, x = x_cast_fp16)[name = tensor("op_51_cast_fp16")]; + tensor var_54_perm_0 = const()[name = tensor("op_54_perm_0"), val = tensor([0, 2, 1])]; tensor concat_4 = const()[name = tensor("concat_4"), val = tensor([64, 4096])]; - tensor transpose_4 = transpose(perm = var_26_perm_0, x = var_23_cast_fp16)[name = tensor("transpose_4")]; - tensor reshape_0_cast_fp16 = reshape(shape = concat_4, x = transpose_4)[name = tensor("reshape_0_cast_fp16")]; + tensor var_54_cast_fp16 = transpose(perm = var_54_perm_0, x = var_51_cast_fp16)[name = tensor("transpose_4")]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_4, x = var_54_cast_fp16)[name = tensor("reshape_0_cast_fp16")]; tensor matmul_0_transpose_x_0 = const()[name = tensor("matmul_0_transpose_x_0"), val = tensor(false)]; tensor matmul_0_transpose_y_0 = const()[name = tensor("matmul_0_transpose_y_0"), val = tensor(false)]; - tensor transpose_1_to_fp16 = const()[name = tensor("transpose_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8320)))]; + tensor transpose_1_to_fp16 = const()[name = tensor("transpose_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8512)))]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_1_to_fp16)[name = tensor("matmul_0_cast_fp16")]; tensor concat_8 = const()[name = tensor("concat_8"), val = tensor([1, 64, 16384])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_8, x = matmul_0_cast_fp16)[name = tensor("reshape_2_cast_fp16")]; tensor matmul_1_transpose_x_0 = const()[name = tensor("matmul_1_transpose_x_0"), val = tensor(false)]; tensor matmul_1_transpose_y_0 = const()[name = tensor("matmul_1_transpose_y_0"), val = tensor(false)]; - tensor transpose_3_to_fp16 = const()[name = tensor("transpose_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134226112)))]; + tensor transpose_3_to_fp16 = const()[name = tensor("transpose_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134226304)))]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_3_to_fp16)[name = tensor("matmul_1_cast_fp16")]; tensor concat_16 = const()[name = tensor("concat_16"), val = tensor([1, 64, 15616])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_16, x = matmul_1_cast_fp16)[name = tensor("reshape_5_cast_fp16")]; - tensor var_41 = const()[name = tensor("op_41"), val = tensor(-1)]; - tensor var_42_interleave_0 = const()[name = tensor("op_42_interleave_0"), val = tensor(false)]; - tensor logits = concat(axis = var_41, interleave = var_42_interleave_0, values = (reshape_2_cast_fp16, reshape_5_cast_fp16))[name = tensor("op_42_cast_fp16")]; + tensor var_69 = const()[name = tensor("op_69"), val = tensor(-1)]; + tensor var_70_interleave_0 = const()[name = tensor("op_70_interleave_0"), val = tensor(false)]; + tensor logits = concat(axis = var_69, interleave = var_70_interleave_0, values = (reshape_2_cast_fp16, reshape_5_cast_fp16))[name = tensor("op_70_cast_fp16")]; } -> (logits); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin index b01cf7c867f985353c64ea8955aad628e988541d..573a47e750e8af81cfa112a025973a20fd25238a 100644 --- a/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23cc0c8382a52638c94e9c9963873d35d3222e897233b39b03f4cc92deae2edb -size 262152448 +oid sha256:a041617e42d7a89d6d1e60a90971f24f8fa62634d1c5db56abd302dcf9c3398e +size 262152640 diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..b3ad9193762cddd887f95dd17cc4042313420d41 100644 --- a/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:ed0dd15fc572d2cc2ec19b317245622b4256a8737cc9ba114529e925d3bf42f2 +size 793 diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json index c5c8878f7ed07b36bc29057ec3f6367e40453195..a405806a76bad8c31d62860b6f28947de5d19aa0 100644 --- a/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk2", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk2", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/model.mil b/Llama-2-7b-hf_chunk2.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk2.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk2.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin index a217cc758bd417b95a3f1548247409bc3ba39c98..fc3ddc761c5a82058a5fe93a4768b08e832cdf55 100644 --- a/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d25138904c91ffd7e03563365ae012b5b126a2b75fc66880152e092e7680e211 -size 303872704 +oid sha256:986f04c4d84fa222e29a878e94d10efdb5cb893883201ece3a0a060f9ab5066e +size 303873856 diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json index 15b9e234e921a5fda79679436c3b40e757f59b50..cc84eb870598d26f738471b6b58f7df7c0960e30 100644 --- a/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk3", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk3", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/model.mil b/Llama-2-7b-hf_chunk3.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk3.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk3.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin index dba7058109d5b8403f52bac7ed8290e93629dfea..04ac0c59197409befa5f1c2c821e78d3a79892a0 100644 --- a/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad1bc13ecfabbb4f02f8306bf18913019826fb28b002e14f11bddeca7a9edefa -size 303872704 +oid sha256:0afd4ddd01084d67eec0c37c977b0ac2be78fe40cf7afcbe707e988e4060ffec +size 303873856 diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin index 6a8e1fcd6e9aac86c476bdfef211aba9441a747c..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b79e263bb20b8a02d650dad2c3eee71ff787829f337aedacb6cd4e1b61c1ce23 -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json index 4df4a4e982e19c4eb6320f8c0dbc8e1d8389aa3c..7ac1fc8aa41bbb4c5b800e4bbff0336bc3f19101 100644 --- a/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk4", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk4", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/model.mil b/Llama-2-7b-hf_chunk4.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk4.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk4.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin index e21eacda88a1266963fc0519fdb65ae62436f1ce..60b7deba61f1f5501a68d21f192d86921373b244 100644 --- a/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2b1969a0b2372ca72340108bf7967f643d02a423cac947a5bd3608fdde48b86 -size 303872704 +oid sha256:d368880511e37c9082ff760946ff94910ec04d00ebd70f03242588bceb67a685 +size 303873856 diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..b3ad9193762cddd887f95dd17cc4042313420d41 100644 --- a/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:ed0dd15fc572d2cc2ec19b317245622b4256a8737cc9ba114529e925d3bf42f2 +size 793 diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json index 82b7676890485cec49c89b0bfcbf270d57ced7bf..dc44f7319ef2d48e2d31e63204ee573f7730e37a 100644 --- a/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk5", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk5", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/model.mil b/Llama-2-7b-hf_chunk5.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk5.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk5.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin index c71f381de60901f6e03693f18cec15ec5d645b9e..912b2dbf14dcbb1dddc9f31568661424ce49755a 100644 --- a/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d931534284a44e5004b85274be8d122ee55af90a599ea689a9491c6ce13fa16 -size 303872704 +oid sha256:d28027700ce1e1dc02bc839d431de13ae2df66b53637a7b559799adce3b84afc +size 303873856 diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..b3ad9193762cddd887f95dd17cc4042313420d41 100644 --- a/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:ed0dd15fc572d2cc2ec19b317245622b4256a8737cc9ba114529e925d3bf42f2 +size 793 diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json index bbc232f400d84f860b0bbc9d074f8a021d011992..eb9dff192b9c60720ea057cb49b7a205be8ac27f 100644 --- a/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk6", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk6", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/model.mil b/Llama-2-7b-hf_chunk6.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk6.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk6.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin index 4d9b250e04d53ce35dcb3567344f6896b14e8a2a..400174a3aa315b42853413431a189f18fdde067d 100644 --- a/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:646d17c5d6d62e055abb88615254cb2d8205cd46a7b98faa734136f30c8ca26a -size 303872704 +oid sha256:52ba7c726d6d195f52b6a623161d59bb42debc6a3e217eaf3f34bcd9be1b3834 +size 303873856 diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json index eb0d6148a6e32ea64a3a9b331c6987331f31a9ce..d146c0a68f8ae66cc78fcc6fa986c2356d91cda5 100644 --- a/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk7", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk7", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/model.mil b/Llama-2-7b-hf_chunk7.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk7.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk7.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin index ec66ac965de015149f69a8beb6ecaacf950c8e57..6d9ed93e13c917b8af824d29fc2c9fde8f0b6ee2 100644 --- a/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88a1c4e9e3f25b1ec4e4b57b7effd4816ff0645b75014cb32186e4848748a7be -size 303872704 +oid sha256:743c81d6e5388b01af35ac832a00e150c9283f580e4f092780414af355038fe2 +size 303873856 diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin index 6a8e1fcd6e9aac86c476bdfef211aba9441a747c..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b79e263bb20b8a02d650dad2c3eee71ff787829f337aedacb6cd4e1b61c1ce23 -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json index b8a06125e6fd1b3852d5ec0c5ad26c3eda1b22e1..1f980b47640ae8d1b930bc4888e2c48b44d5b3f3 100644 --- a/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk8", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk8", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/model.mil b/Llama-2-7b-hf_chunk8.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk8.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk8.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin index 6a5ab100b7b2a167e2520f92e51d3193a316a8a4..2d33b4c789dcb4491a4aeb9061d99e6e8e8bc7f0 100644 --- a/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0072b3a69cac604bd35ef4f9b83c10dfcb19a5eea0a8dac1e2402f4981ea530a -size 303872704 +oid sha256:1dcb79d127d1f4760791e82836f6b7efee5c5d219075e2b8a8cd1a626f16fec8 +size 303873856 diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin index e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c..4fe83fe71107a43dada0318cb8055e6cdccff704 100644 --- a/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin +++ b/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +oid sha256:997c2b09d10cc368b341f867b52aac1e9325853550f47133cc48a353128e881a size 243 diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin index e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347..6f8fd64bce0d223b711086f7c1798691439f0bc5 100644 --- a/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin +++ b/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e -size 791 +oid sha256:3fdd5ca1ab176b28ed33e53920cb3ef99dac8b0e220af01536a3969d5d83f1a5 +size 793 diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json index dbc7d27946c8739ff15e79e1d1de5f40df227a2e..bfd71f0fafb947a9c1b8557877d4e1adfcde85fb 100644 --- a/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json +++ b/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json @@ -7,9 +7,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "new_x", "type" : "MultiArray" }, @@ -17,9 +17,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_0", "type" : "MultiArray" }, @@ -27,9 +27,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_1", "type" : "MultiArray" }, @@ -37,9 +37,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache_2", "type" : "MultiArray" }, @@ -47,9 +47,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_0", "type" : "MultiArray" }, @@ -57,9 +57,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_1", "type" : "MultiArray" }, @@ -67,9 +67,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache_2", "type" : "MultiArray" } @@ -79,17 +79,18 @@ ], "specificationVersion" : 7, "mlProgramOperationTypeHistogram" : { - "Concat" : 18, - "Ios16.rsqrt" : 6, - "Ios16.mul" : 63, - "SliceByIndex" : 12, + "Concat" : 21, + "Ios16.mul" : 150, + "SliceByIndex" : 300, "Ios16.constexprLutToDense" : 21, + "Transpose" : 3, + "Ios16.einsum" : 192, "Ios16.conv" : 21, - "Ios16.add" : 21, - "Ios16.reduceMean" : 6, - "Ios16.matmul" : 6, - "Ios16.softmax" : 3, - "Ios16.reshape" : 12, + "Ios16.add" : 108, + "Ios16.realDiv" : 6, + "Ios16.softmax" : 96, + "Ios16.reduceL2Norm" : 6, + "Ios16.reshape" : 21, "Ios16.silu" : 3 }, "computePrecision" : "Mixed (Float16, Int32)", @@ -108,16 +109,16 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 8 × 8)", "shortDescription" : "", - "shape" : "[1, 4096, 1, 64]", + "shape" : "[1, 4096, 8, 8]", "name" : "x", "type" : "MultiArray" }, @@ -145,9 +146,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 1, 64, 512]", + "shape" : "[1, 512, 1, 64]", "name" : "mask", "type" : "MultiArray" }, @@ -155,9 +156,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_0", "type" : "MultiArray" }, @@ -165,9 +166,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_0", "type" : "MultiArray" }, @@ -175,9 +176,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_1", "type" : "MultiArray" }, @@ -185,9 +186,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_1", "type" : "MultiArray" }, @@ -195,9 +196,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "k_cache_2", "type" : "MultiArray" }, @@ -205,14 +206,14 @@ "hasShapeFlexibility" : "0", "isOptional" : "1", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)?", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "v_cache_2", "type" : "MultiArray" } ], - "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk9", + "generatedClassName" : "Llama_2_7b_hf_2024_08_09_09_54_41_chunk9", "method" : "predict" } ] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/model.mil b/Llama-2-7b-hf_chunk9.mlmodelc/model.mil index d5387d44d58aa12214b26cdaf15fcd539841a734..4542bbd13c6999eab52cf6d57c56a10fb6cfc308 100644 --- a/Llama-2-7b-hf_chunk9.mlmodelc/model.mil +++ b/Llama-2-7b-hf_chunk9.mlmodelc/model.mil @@ -1,7 +1,7 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; @@ -23,407 +23,2315 @@ program(1.0) tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; - tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; - tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; - tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; - tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; - tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; - tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; - tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; - tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; - tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; - tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; - tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; - tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; - tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; - tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; - tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; - tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; - tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; - tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; - tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; - tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; - tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; - tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; - tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; - tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; - tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; - tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; - tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; - tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; - tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; - tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; - tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; - tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; - tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; - tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-1)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-2)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-3)]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor(1)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_64, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_67, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_91_to_fp16 = const()[name = tensor("op_91_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_91_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303567040)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_113, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121_pad_type_0 = const()[name = tensor("op_121_pad_type_0"), val = tensor("custom")]; + tensor var_121_pad_0 = const()[name = tensor("op_121_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_cast_fp16 = conv(dilations = var_119, groups = var_64, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_117, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575296)))]; + tensor q_1_cast_fp16 = mul(x = var_121_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 1])]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1])]; + tensor var_129_pad_type_0 = const()[name = tensor("op_129_pad_type_0"), val = tensor("custom")]; + tensor var_129_pad_0 = const()[name = tensor("op_129_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_129_cast_fp16 = conv(dilations = var_127, groups = var_64, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_125, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583552)))]; + tensor k_1_cast_fp16 = mul(x = var_129_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 1])]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1])]; + tensor var_137_pad_type_0 = const()[name = tensor("op_137_pad_type_0"), val = tensor("custom")]; + tensor var_137_pad_0 = const()[name = tensor("op_137_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_137_cast_fp16 = conv(dilations = var_135, groups = var_64, pad = var_137_pad_0, pad_type = var_137_pad_type_0, strides = var_133, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591808)))]; + tensor v_1_cast_fp16 = mul(x = var_137_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_139, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_141, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_155_begin_0 = const()[name = tensor("op_155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_end_0 = const()[name = tensor("op_155_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_155_end_mask_0 = const()[name = tensor("op_155_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = var_155_end_0, end_mask = var_155_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_163_cast_fp16 = mul(x = var_161_cast_fp16, y = const_11_promoted_to_fp16)[name = tensor("op_163_cast_fp16")]; tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; - tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; - tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; - tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; - tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; - tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; - tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; - tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_1_cast_fp16 = concat(axis = var_21, interleave = rotated_1_interleave_0, values = (var_163_cast_fp16, var_155_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_166_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_166_cast_fp16")]; + tensor var_167_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_167_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_166_cast_fp16, y = var_167_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_180_begin_0 = const()[name = tensor("op_180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_end_0 = const()[name = tensor("op_180_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_180_end_mask_0 = const()[name = tensor("op_180_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_180_cast_fp16 = slice_by_index(begin = var_180_begin_0, end = var_180_end_0, end_mask = var_180_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_begin_0 = const()[name = tensor("op_186_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_186_end_0 = const()[name = tensor("op_186_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_186_end_mask_0 = const()[name = tensor("op_186_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_186_cast_fp16 = slice_by_index(begin = var_186_begin_0, end = var_186_end_0, end_mask = var_186_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor const_13_promoted_to_fp16 = const()[name = tensor("const_13_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_188_cast_fp16 = mul(x = var_186_cast_fp16, y = const_13_promoted_to_fp16)[name = tensor("op_188_cast_fp16")]; tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; - tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; - tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; - tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; - tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; - tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; - tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; - tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; - tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; - tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; - tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; - tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; - tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; - tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; - tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; - tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; - tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; - tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; - tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; - tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; - tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; - tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; - tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; - tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; - tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; - tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; - tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; - tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; - tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; - tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; - tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; - tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; - tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; - tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; - tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; - tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; - tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; - tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; - tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; - tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; - tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; - tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; - tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; - tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; - tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; - tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; - tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; - tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; - tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; - tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; - tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; - tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; - tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; - tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; - tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; - tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; - tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; - tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; - tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; - tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; - tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; - tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; - tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; - tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; - tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; - tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; - tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; - tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; - tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; - tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; - tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; - tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; - tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; - tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; - tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; - tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; - tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; - tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; - tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; - tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; - tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; - tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; - tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; - tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; - tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; - tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_3_cast_fp16 = concat(axis = var_21, interleave = rotated_3_interleave_0, values = (var_188_cast_fp16, var_180_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_191_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_191_cast_fp16")]; + tensor var_192_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_192_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_191_cast_fp16, y = var_192_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 4096, 1, 64])]; + tensor var_196_cast_fp16 = reshape(shape = var_195, x = roped_3_cast_fp16)[name = tensor("op_196_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_198, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_interleave_0 = const()[name = tensor("k_9_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_7_perm_0, x = var_196_cast_fp16)[name = tensor("transpose_2")]; + tensor k_9_cast_fp16 = concat(axis = var_23, interleave = k_9_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_9_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_17, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, 4096, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_205, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_218_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_246_cast_fp16")]; + tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_340_cast_fp16")]; + tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_344_cast_fp16")]; + tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_356_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_364_cast_fp16")]; + tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_368_cast_fp16")]; + tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_372_cast_fp16")]; + tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_384_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_396_cast_fp16")]; + tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_400_cast_fp16")]; + tensor var_404_begin_0 = const()[name = tensor("op_404_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_404_end_0 = const()[name = tensor("op_404_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_404_end_mask_0 = const()[name = tensor("op_404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_404_cast_fp16 = slice_by_index(begin = var_404_begin_0, end = var_404_end_0, end_mask = var_404_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_404_cast_fp16")]; + tensor var_408_begin_0 = const()[name = tensor("op_408_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_408_end_0 = const()[name = tensor("op_408_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_408_end_mask_0 = const()[name = tensor("op_408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_408_cast_fp16 = slice_by_index(begin = var_408_begin_0, end = var_408_end_0, end_mask = var_408_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_408_cast_fp16")]; + tensor var_412_begin_0 = const()[name = tensor("op_412_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_412_end_0 = const()[name = tensor("op_412_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_412_end_mask_0 = const()[name = tensor("op_412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_412_cast_fp16 = slice_by_index(begin = var_412_begin_0, end = var_412_end_0, end_mask = var_412_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_412_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_420_begin_0 = const()[name = tensor("op_420_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_420_end_0 = const()[name = tensor("op_420_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_420_end_mask_0 = const()[name = tensor("op_420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_420_cast_fp16 = slice_by_index(begin = var_420_begin_0, end = var_420_end_0, end_mask = var_420_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_420_cast_fp16")]; + tensor var_424_begin_0 = const()[name = tensor("op_424_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_424_end_0 = const()[name = tensor("op_424_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_424_end_mask_0 = const()[name = tensor("op_424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_424_cast_fp16")]; + tensor var_428_begin_0 = const()[name = tensor("op_428_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_428_end_0 = const()[name = tensor("op_428_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_428_end_mask_0 = const()[name = tensor("op_428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_428_cast_fp16 = slice_by_index(begin = var_428_begin_0, end = var_428_end_0, end_mask = var_428_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor var_432_begin_0 = const()[name = tensor("op_432_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_432_end_0 = const()[name = tensor("op_432_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_432_end_mask_0 = const()[name = tensor("op_432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_432_cast_fp16 = slice_by_index(begin = var_432_begin_0, end = var_432_end_0, end_mask = var_432_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor var_436_begin_0 = const()[name = tensor("op_436_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_436_end_0 = const()[name = tensor("op_436_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_436_end_mask_0 = const()[name = tensor("op_436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_436_cast_fp16 = slice_by_index(begin = var_436_begin_0, end = var_436_end_0, end_mask = var_436_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor var_440_begin_0 = const()[name = tensor("op_440_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_440_end_0 = const()[name = tensor("op_440_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_440_end_mask_0 = const()[name = tensor("op_440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_440_cast_fp16 = slice_by_index(begin = var_440_begin_0, end = var_440_end_0, end_mask = var_440_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_448_begin_0 = const()[name = tensor("op_448_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_448_end_0 = const()[name = tensor("op_448_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_448_end_mask_0 = const()[name = tensor("op_448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_448_cast_fp16 = slice_by_index(begin = var_448_begin_0, end = var_448_end_0, end_mask = var_448_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_452_begin_0 = const()[name = tensor("op_452_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_452_end_0 = const()[name = tensor("op_452_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_452_end_mask_0 = const()[name = tensor("op_452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_452_cast_fp16 = slice_by_index(begin = var_452_begin_0, end = var_452_end_0, end_mask = var_452_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_452_cast_fp16")]; + tensor var_456_begin_0 = const()[name = tensor("op_456_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_456_end_0 = const()[name = tensor("op_456_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_456_end_mask_0 = const()[name = tensor("op_456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_456_cast_fp16 = slice_by_index(begin = var_456_begin_0, end = var_456_end_0, end_mask = var_456_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_456_cast_fp16")]; + tensor var_460_begin_0 = const()[name = tensor("op_460_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_460_end_0 = const()[name = tensor("op_460_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_460_end_mask_0 = const()[name = tensor("op_460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_460_cast_fp16 = slice_by_index(begin = var_460_begin_0, end = var_460_end_0, end_mask = var_460_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_460_cast_fp16")]; + tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor var_466_begin_0 = const()[name = tensor("op_466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_466_end_0 = const()[name = tensor("op_466_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_466_end_mask_0 = const()[name = tensor("op_466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_474_begin_0 = const()[name = tensor("op_474_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_474_end_0 = const()[name = tensor("op_474_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_474_end_mask_0 = const()[name = tensor("op_474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = var_474_end_0, end_mask = var_474_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_482_begin_0 = const()[name = tensor("op_482_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_482_end_0 = const()[name = tensor("op_482_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_482_end_mask_0 = const()[name = tensor("op_482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_482_cast_fp16 = slice_by_index(begin = var_482_begin_0, end = var_482_end_0, end_mask = var_482_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_490_begin_0 = const()[name = tensor("op_490_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_490_end_0 = const()[name = tensor("op_490_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_490_end_mask_0 = const()[name = tensor("op_490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_490_cast_fp16 = slice_by_index(begin = var_490_begin_0, end = var_490_end_0, end_mask = var_490_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_490_cast_fp16")]; + tensor var_494_begin_0 = const()[name = tensor("op_494_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_494_end_0 = const()[name = tensor("op_494_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_494_end_mask_0 = const()[name = tensor("op_494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor var_498_begin_0 = const()[name = tensor("op_498_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_498_end_0 = const()[name = tensor("op_498_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_498_end_mask_0 = const()[name = tensor("op_498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_498_cast_fp16 = slice_by_index(begin = var_498_begin_0, end = var_498_end_0, end_mask = var_498_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_498_cast_fp16")]; + tensor var_502_begin_0 = const()[name = tensor("op_502_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_502_end_0 = const()[name = tensor("op_502_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_502_end_mask_0 = const()[name = tensor("op_502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_502_cast_fp16 = slice_by_index(begin = var_502_begin_0, end = var_502_end_0, end_mask = var_502_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_510_begin_0 = const()[name = tensor("op_510_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_510_end_0 = const()[name = tensor("op_510_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_510_end_mask_0 = const()[name = tensor("op_510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_510_cast_fp16 = slice_by_index(begin = var_510_begin_0, end = var_510_end_0, end_mask = var_510_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_518_begin_0 = const()[name = tensor("op_518_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_518_end_0 = const()[name = tensor("op_518_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_518_end_mask_0 = const()[name = tensor("op_518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_518_cast_fp16 = slice_by_index(begin = var_518_begin_0, end = var_518_end_0, end_mask = var_518_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor var_522_begin_0 = const()[name = tensor("op_522_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_522_end_0 = const()[name = tensor("op_522_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_522_end_mask_0 = const()[name = tensor("op_522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_522_cast_fp16 = slice_by_index(begin = var_522_begin_0, end = var_522_end_0, end_mask = var_522_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_522_cast_fp16")]; + tensor var_526_begin_0 = const()[name = tensor("op_526_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_526_end_0 = const()[name = tensor("op_526_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_526_end_mask_0 = const()[name = tensor("op_526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_530_begin_0 = const()[name = tensor("op_530_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_530_end_0 = const()[name = tensor("op_530_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_530_end_mask_0 = const()[name = tensor("op_530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_530_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor var_538_begin_0 = const()[name = tensor("op_538_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_538_end_0 = const()[name = tensor("op_538_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_538_end_mask_0 = const()[name = tensor("op_538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_546_begin_0 = const()[name = tensor("op_546_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_546_end_0 = const()[name = tensor("op_546_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_546_end_mask_0 = const()[name = tensor("op_546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_546_cast_fp16")]; + tensor var_550_begin_0 = const()[name = tensor("op_550_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_550_end_0 = const()[name = tensor("op_550_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_550_end_mask_0 = const()[name = tensor("op_550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_550_cast_fp16 = slice_by_index(begin = var_550_begin_0, end = var_550_end_0, end_mask = var_550_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_550_cast_fp16")]; + tensor var_554_begin_0 = const()[name = tensor("op_554_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_554_end_0 = const()[name = tensor("op_554_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_554_end_mask_0 = const()[name = tensor("op_554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_554_cast_fp16")]; + tensor var_558_begin_0 = const()[name = tensor("op_558_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_558_end_0 = const()[name = tensor("op_558_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_558_end_mask_0 = const()[name = tensor("op_558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_558_cast_fp16 = slice_by_index(begin = var_558_begin_0, end = var_558_end_0, end_mask = var_558_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_562_cast_fp16")]; + tensor var_566_begin_0 = const()[name = tensor("op_566_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_566_end_0 = const()[name = tensor("op_566_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_566_end_mask_0 = const()[name = tensor("op_566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_574_begin_0 = const()[name = tensor("op_574_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_574_end_0 = const()[name = tensor("op_574_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_574_end_mask_0 = const()[name = tensor("op_574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = var_574_end_0, end_mask = var_574_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_578_begin_0 = const()[name = tensor("op_578_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_578_end_0 = const()[name = tensor("op_578_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_578_end_mask_0 = const()[name = tensor("op_578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = var_578_end_0, end_mask = var_578_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_578_cast_fp16")]; + tensor var_582_begin_0 = const()[name = tensor("op_582_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_582_end_0 = const()[name = tensor("op_582_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_582_end_mask_0 = const()[name = tensor("op_582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_582_cast_fp16 = slice_by_index(begin = var_582_begin_0, end = var_582_end_0, end_mask = var_582_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_582_cast_fp16")]; + tensor var_586_begin_0 = const()[name = tensor("op_586_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_586_end_0 = const()[name = tensor("op_586_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_586_end_mask_0 = const()[name = tensor("op_586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_586_cast_fp16")]; + tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor var_594_equation_0 = const()[name = tensor("op_594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_340_cast_fp16, var_210_cast_fp16))[name = tensor("op_594_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_596_cast_fp16 = mul(x = var_594_cast_fp16, y = var_595_to_fp16)[name = tensor("op_596_cast_fp16")]; + tensor var_598_equation_0 = const()[name = tensor("op_598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_344_cast_fp16, var_214_cast_fp16))[name = tensor("op_598_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_600_cast_fp16 = mul(x = var_598_cast_fp16, y = var_599_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_602_equation_0 = const()[name = tensor("op_602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_348_cast_fp16, var_218_cast_fp16))[name = tensor("op_602_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_604_cast_fp16 = mul(x = var_602_cast_fp16, y = var_603_to_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_606_equation_0 = const()[name = tensor("op_606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_352_cast_fp16, var_222_cast_fp16))[name = tensor("op_606_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_608_cast_fp16 = mul(x = var_606_cast_fp16, y = var_607_to_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_610_equation_0 = const()[name = tensor("op_610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_610_cast_fp16 = einsum(equation = var_610_equation_0, values = (var_356_cast_fp16, var_226_cast_fp16))[name = tensor("op_610_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_612_cast_fp16 = mul(x = var_610_cast_fp16, y = var_611_to_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_614_equation_0 = const()[name = tensor("op_614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_614_cast_fp16 = einsum(equation = var_614_equation_0, values = (var_360_cast_fp16, var_230_cast_fp16))[name = tensor("op_614_cast_fp16")]; + tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_616_cast_fp16 = mul(x = var_614_cast_fp16, y = var_615_to_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_618_equation_0 = const()[name = tensor("op_618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_618_cast_fp16 = einsum(equation = var_618_equation_0, values = (var_364_cast_fp16, var_234_cast_fp16))[name = tensor("op_618_cast_fp16")]; + tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_620_cast_fp16 = mul(x = var_618_cast_fp16, y = var_619_to_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_622_equation_0 = const()[name = tensor("op_622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_622_cast_fp16 = einsum(equation = var_622_equation_0, values = (var_368_cast_fp16, var_238_cast_fp16))[name = tensor("op_622_cast_fp16")]; + tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_624_cast_fp16 = mul(x = var_622_cast_fp16, y = var_623_to_fp16)[name = tensor("op_624_cast_fp16")]; + tensor var_626_equation_0 = const()[name = tensor("op_626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_626_cast_fp16 = einsum(equation = var_626_equation_0, values = (var_372_cast_fp16, var_242_cast_fp16))[name = tensor("op_626_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_628_cast_fp16 = mul(x = var_626_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_630_equation_0 = const()[name = tensor("op_630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_630_cast_fp16 = einsum(equation = var_630_equation_0, values = (var_376_cast_fp16, var_246_cast_fp16))[name = tensor("op_630_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_632_cast_fp16 = mul(x = var_630_cast_fp16, y = var_631_to_fp16)[name = tensor("op_632_cast_fp16")]; + tensor var_634_equation_0 = const()[name = tensor("op_634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_380_cast_fp16, var_250_cast_fp16))[name = tensor("op_634_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_636_cast_fp16 = mul(x = var_634_cast_fp16, y = var_635_to_fp16)[name = tensor("op_636_cast_fp16")]; + tensor var_638_equation_0 = const()[name = tensor("op_638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_384_cast_fp16, var_254_cast_fp16))[name = tensor("op_638_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_640_cast_fp16 = mul(x = var_638_cast_fp16, y = var_639_to_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_642_equation_0 = const()[name = tensor("op_642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_388_cast_fp16, var_258_cast_fp16))[name = tensor("op_642_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_644_cast_fp16 = mul(x = var_642_cast_fp16, y = var_643_to_fp16)[name = tensor("op_644_cast_fp16")]; + tensor var_646_equation_0 = const()[name = tensor("op_646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_646_cast_fp16 = einsum(equation = var_646_equation_0, values = (var_392_cast_fp16, var_262_cast_fp16))[name = tensor("op_646_cast_fp16")]; + tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_648_cast_fp16 = mul(x = var_646_cast_fp16, y = var_647_to_fp16)[name = tensor("op_648_cast_fp16")]; + tensor var_650_equation_0 = const()[name = tensor("op_650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_650_cast_fp16 = einsum(equation = var_650_equation_0, values = (var_396_cast_fp16, var_266_cast_fp16))[name = tensor("op_650_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_652_cast_fp16 = mul(x = var_650_cast_fp16, y = var_651_to_fp16)[name = tensor("op_652_cast_fp16")]; + tensor var_654_equation_0 = const()[name = tensor("op_654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_654_cast_fp16 = einsum(equation = var_654_equation_0, values = (var_400_cast_fp16, var_270_cast_fp16))[name = tensor("op_654_cast_fp16")]; + tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_656_cast_fp16 = mul(x = var_654_cast_fp16, y = var_655_to_fp16)[name = tensor("op_656_cast_fp16")]; + tensor var_658_equation_0 = const()[name = tensor("op_658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_658_cast_fp16 = einsum(equation = var_658_equation_0, values = (var_404_cast_fp16, var_274_cast_fp16))[name = tensor("op_658_cast_fp16")]; + tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_660_cast_fp16 = mul(x = var_658_cast_fp16, y = var_659_to_fp16)[name = tensor("op_660_cast_fp16")]; + tensor var_662_equation_0 = const()[name = tensor("op_662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_662_cast_fp16 = einsum(equation = var_662_equation_0, values = (var_408_cast_fp16, var_278_cast_fp16))[name = tensor("op_662_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_664_cast_fp16 = mul(x = var_662_cast_fp16, y = var_663_to_fp16)[name = tensor("op_664_cast_fp16")]; + tensor var_666_equation_0 = const()[name = tensor("op_666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_666_cast_fp16 = einsum(equation = var_666_equation_0, values = (var_412_cast_fp16, var_282_cast_fp16))[name = tensor("op_666_cast_fp16")]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_668_cast_fp16 = mul(x = var_666_cast_fp16, y = var_667_to_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_670_equation_0 = const()[name = tensor("op_670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_670_cast_fp16 = einsum(equation = var_670_equation_0, values = (var_416_cast_fp16, var_286_cast_fp16))[name = tensor("op_670_cast_fp16")]; + tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_672_cast_fp16 = mul(x = var_670_cast_fp16, y = var_671_to_fp16)[name = tensor("op_672_cast_fp16")]; + tensor var_674_equation_0 = const()[name = tensor("op_674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_674_cast_fp16 = einsum(equation = var_674_equation_0, values = (var_420_cast_fp16, var_290_cast_fp16))[name = tensor("op_674_cast_fp16")]; + tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_676_cast_fp16 = mul(x = var_674_cast_fp16, y = var_675_to_fp16)[name = tensor("op_676_cast_fp16")]; + tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_678_cast_fp16 = einsum(equation = var_678_equation_0, values = (var_424_cast_fp16, var_294_cast_fp16))[name = tensor("op_678_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_680_cast_fp16 = mul(x = var_678_cast_fp16, y = var_679_to_fp16)[name = tensor("op_680_cast_fp16")]; + tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_682_cast_fp16 = einsum(equation = var_682_equation_0, values = (var_428_cast_fp16, var_298_cast_fp16))[name = tensor("op_682_cast_fp16")]; + tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_684_cast_fp16 = mul(x = var_682_cast_fp16, y = var_683_to_fp16)[name = tensor("op_684_cast_fp16")]; + tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_686_cast_fp16 = einsum(equation = var_686_equation_0, values = (var_432_cast_fp16, var_302_cast_fp16))[name = tensor("op_686_cast_fp16")]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_688_cast_fp16 = mul(x = var_686_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; + tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_690_cast_fp16 = einsum(equation = var_690_equation_0, values = (var_436_cast_fp16, var_306_cast_fp16))[name = tensor("op_690_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_692_cast_fp16 = mul(x = var_690_cast_fp16, y = var_691_to_fp16)[name = tensor("op_692_cast_fp16")]; + tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_694_cast_fp16 = einsum(equation = var_694_equation_0, values = (var_440_cast_fp16, var_310_cast_fp16))[name = tensor("op_694_cast_fp16")]; + tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_696_cast_fp16 = mul(x = var_694_cast_fp16, y = var_695_to_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_444_cast_fp16, var_314_cast_fp16))[name = tensor("op_698_cast_fp16")]; + tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_700_cast_fp16 = mul(x = var_698_cast_fp16, y = var_699_to_fp16)[name = tensor("op_700_cast_fp16")]; + tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_448_cast_fp16, var_318_cast_fp16))[name = tensor("op_702_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_704_cast_fp16 = mul(x = var_702_cast_fp16, y = var_703_to_fp16)[name = tensor("op_704_cast_fp16")]; + tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_452_cast_fp16, var_322_cast_fp16))[name = tensor("op_706_cast_fp16")]; + tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_708_cast_fp16 = mul(x = var_706_cast_fp16, y = var_707_to_fp16)[name = tensor("op_708_cast_fp16")]; + tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_456_cast_fp16, var_326_cast_fp16))[name = tensor("op_710_cast_fp16")]; + tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_712_cast_fp16 = mul(x = var_710_cast_fp16, y = var_711_to_fp16)[name = tensor("op_712_cast_fp16")]; + tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_714_cast_fp16 = einsum(equation = var_714_equation_0, values = (var_460_cast_fp16, var_330_cast_fp16))[name = tensor("op_714_cast_fp16")]; + tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_716_cast_fp16 = mul(x = var_714_cast_fp16, y = var_715_to_fp16)[name = tensor("op_716_cast_fp16")]; + tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_718_cast_fp16 = einsum(equation = var_718_equation_0, values = (var_464_cast_fp16, var_334_cast_fp16))[name = tensor("op_718_cast_fp16")]; + tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_720_cast_fp16 = mul(x = var_718_cast_fp16, y = var_719_to_fp16)[name = tensor("op_720_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_596_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_600_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_604_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_608_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_612_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_616_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_620_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_624_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_628_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_632_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_636_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_640_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_644_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_648_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_652_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_656_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_660_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_664_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_668_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_672_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_676_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_680_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_684_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_688_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_692_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_696_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_700_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_704_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_708_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_712_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_716_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_720_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor var_753_cast_fp16 = softmax(axis = var_64, x = aw_1_cast_fp16)[name = tensor("op_753_cast_fp16")]; + tensor var_754_cast_fp16 = softmax(axis = var_64, x = aw_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; + tensor var_755_cast_fp16 = softmax(axis = var_64, x = aw_5_cast_fp16)[name = tensor("op_755_cast_fp16")]; + tensor var_756_cast_fp16 = softmax(axis = var_64, x = aw_7_cast_fp16)[name = tensor("op_756_cast_fp16")]; + tensor var_757_cast_fp16 = softmax(axis = var_64, x = aw_9_cast_fp16)[name = tensor("op_757_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_64, x = aw_11_cast_fp16)[name = tensor("op_758_cast_fp16")]; + tensor var_759_cast_fp16 = softmax(axis = var_64, x = aw_13_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_760_cast_fp16 = softmax(axis = var_64, x = aw_15_cast_fp16)[name = tensor("op_760_cast_fp16")]; + tensor var_761_cast_fp16 = softmax(axis = var_64, x = aw_17_cast_fp16)[name = tensor("op_761_cast_fp16")]; + tensor var_762_cast_fp16 = softmax(axis = var_64, x = aw_19_cast_fp16)[name = tensor("op_762_cast_fp16")]; + tensor var_763_cast_fp16 = softmax(axis = var_64, x = aw_21_cast_fp16)[name = tensor("op_763_cast_fp16")]; + tensor var_764_cast_fp16 = softmax(axis = var_64, x = aw_23_cast_fp16)[name = tensor("op_764_cast_fp16")]; + tensor var_765_cast_fp16 = softmax(axis = var_64, x = aw_25_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor var_766_cast_fp16 = softmax(axis = var_64, x = aw_27_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_767_cast_fp16 = softmax(axis = var_64, x = aw_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_768_cast_fp16 = softmax(axis = var_64, x = aw_31_cast_fp16)[name = tensor("op_768_cast_fp16")]; + tensor var_769_cast_fp16 = softmax(axis = var_64, x = aw_33_cast_fp16)[name = tensor("op_769_cast_fp16")]; + tensor var_770_cast_fp16 = softmax(axis = var_64, x = aw_35_cast_fp16)[name = tensor("op_770_cast_fp16")]; + tensor var_771_cast_fp16 = softmax(axis = var_64, x = aw_37_cast_fp16)[name = tensor("op_771_cast_fp16")]; + tensor var_772_cast_fp16 = softmax(axis = var_64, x = aw_39_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor var_773_cast_fp16 = softmax(axis = var_64, x = aw_41_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_774_cast_fp16 = softmax(axis = var_64, x = aw_43_cast_fp16)[name = tensor("op_774_cast_fp16")]; + tensor var_775_cast_fp16 = softmax(axis = var_64, x = aw_45_cast_fp16)[name = tensor("op_775_cast_fp16")]; + tensor var_776_cast_fp16 = softmax(axis = var_64, x = aw_47_cast_fp16)[name = tensor("op_776_cast_fp16")]; + tensor var_777_cast_fp16 = softmax(axis = var_64, x = aw_49_cast_fp16)[name = tensor("op_777_cast_fp16")]; + tensor var_778_cast_fp16 = softmax(axis = var_64, x = aw_51_cast_fp16)[name = tensor("op_778_cast_fp16")]; + tensor var_779_cast_fp16 = softmax(axis = var_64, x = aw_53_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780_cast_fp16 = softmax(axis = var_64, x = aw_55_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_781_cast_fp16 = softmax(axis = var_64, x = aw_57_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor var_782_cast_fp16 = softmax(axis = var_64, x = aw_59_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_783_cast_fp16 = softmax(axis = var_64, x = aw_61_cast_fp16)[name = tensor("op_783_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_64, x = aw_63_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_466_cast_fp16, var_753_cast_fp16))[name = tensor("op_786_cast_fp16")]; + tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_470_cast_fp16, var_754_cast_fp16))[name = tensor("op_788_cast_fp16")]; + tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_474_cast_fp16, var_755_cast_fp16))[name = tensor("op_790_cast_fp16")]; + tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_478_cast_fp16, var_756_cast_fp16))[name = tensor("op_792_cast_fp16")]; + tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_482_cast_fp16, var_757_cast_fp16))[name = tensor("op_794_cast_fp16")]; + tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_486_cast_fp16, var_758_cast_fp16))[name = tensor("op_796_cast_fp16")]; + tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_490_cast_fp16, var_759_cast_fp16))[name = tensor("op_798_cast_fp16")]; + tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_494_cast_fp16, var_760_cast_fp16))[name = tensor("op_800_cast_fp16")]; + tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_498_cast_fp16, var_761_cast_fp16))[name = tensor("op_802_cast_fp16")]; + tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_502_cast_fp16, var_762_cast_fp16))[name = tensor("op_804_cast_fp16")]; + tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_506_cast_fp16, var_763_cast_fp16))[name = tensor("op_806_cast_fp16")]; + tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_510_cast_fp16, var_764_cast_fp16))[name = tensor("op_808_cast_fp16")]; + tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_514_cast_fp16, var_765_cast_fp16))[name = tensor("op_810_cast_fp16")]; + tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_518_cast_fp16, var_766_cast_fp16))[name = tensor("op_812_cast_fp16")]; + tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_522_cast_fp16, var_767_cast_fp16))[name = tensor("op_814_cast_fp16")]; + tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_526_cast_fp16, var_768_cast_fp16))[name = tensor("op_816_cast_fp16")]; + tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_530_cast_fp16, var_769_cast_fp16))[name = tensor("op_818_cast_fp16")]; + tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_534_cast_fp16, var_770_cast_fp16))[name = tensor("op_820_cast_fp16")]; + tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_538_cast_fp16, var_771_cast_fp16))[name = tensor("op_822_cast_fp16")]; + tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_542_cast_fp16, var_772_cast_fp16))[name = tensor("op_824_cast_fp16")]; + tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_546_cast_fp16, var_773_cast_fp16))[name = tensor("op_826_cast_fp16")]; + tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_550_cast_fp16, var_774_cast_fp16))[name = tensor("op_828_cast_fp16")]; + tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_554_cast_fp16, var_775_cast_fp16))[name = tensor("op_830_cast_fp16")]; + tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_558_cast_fp16, var_776_cast_fp16))[name = tensor("op_832_cast_fp16")]; + tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_562_cast_fp16, var_777_cast_fp16))[name = tensor("op_834_cast_fp16")]; + tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_566_cast_fp16, var_778_cast_fp16))[name = tensor("op_836_cast_fp16")]; + tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_570_cast_fp16, var_779_cast_fp16))[name = tensor("op_838_cast_fp16")]; + tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_574_cast_fp16, var_780_cast_fp16))[name = tensor("op_840_cast_fp16")]; + tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_578_cast_fp16, var_781_cast_fp16))[name = tensor("op_842_cast_fp16")]; + tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_582_cast_fp16, var_782_cast_fp16))[name = tensor("op_844_cast_fp16")]; + tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_586_cast_fp16, var_783_cast_fp16))[name = tensor("op_846_cast_fp16")]; + tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_590_cast_fp16, var_784_cast_fp16))[name = tensor("op_848_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_64, interleave = x_11_interleave_0, values = (var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_853 = const()[name = tensor("op_853"), val = tensor([1, 4096, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_853, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_861_pad_type_0 = const()[name = tensor("op_861_pad_type_0"), val = tensor("custom")]; + tensor var_861_pad_0 = const()[name = tensor("op_861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_861_cast_fp16 = conv(dilations = var_859, groups = var_64, pad = var_861_pad_0, pad_type = var_861_pad_type_0, strides = var_857, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_861_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303600064)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_861_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608320)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_64, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_67, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_886_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608512)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_898 = const()[name = tensor("op_898"), val = tensor([1, 1])]; + tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, 1])]; + tensor var_902_pad_type_0 = const()[name = tensor("op_902_pad_type_0"), val = tensor("custom")]; + tensor var_902_pad_0 = const()[name = tensor("op_902_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_902_cast_fp16 = conv(dilations = var_900, groups = var_64, pad = var_902_pad_0, pad_type = var_902_pad_type_0, strides = var_898, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_902_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616768)))]; + tensor input_7_cast_fp16 = mul(x = var_902_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_906 = const()[name = tensor("op_906"), val = tensor([1, 1])]; + tensor var_908 = const()[name = tensor("op_908"), val = tensor([1, 1])]; + tensor var_910_pad_type_0 = const()[name = tensor("op_910_pad_type_0"), val = tensor("custom")]; + tensor var_910_pad_0 = const()[name = tensor("op_910_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_910_cast_fp16 = conv(dilations = var_908, groups = var_64, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_906, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_5_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638848)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_910_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_912_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_912_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_912_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_916 = const()[name = tensor("op_916"), val = tensor([1, 1])]; + tensor var_918 = const()[name = tensor("op_918"), val = tensor([1, 1])]; + tensor var_920_pad_type_0 = const()[name = tensor("op_920_pad_type_0"), val = tensor("custom")]; + tensor var_920_pad_0 = const()[name = tensor("op_920_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_cast_fp16 = conv(dilations = var_918, groups = var_64, pad = var_920_pad_0, pad_type = var_920_pad_type_0, strides = var_916, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660928)))]; + tensor var_921_cast_fp16 = mul(x = var_920_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_921_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_921_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_927 = const()[name = tensor("op_927"), val = tensor(-1)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(-2)]; + tensor var_933 = const()[name = tensor("op_933"), val = tensor(-3)]; + tensor var_974 = const()[name = tensor("op_974"), val = tensor(1)]; + tensor var_977 = const()[name = tensor("op_977"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669184)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_974, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_977, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = tensor("op_1000_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_1000_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303669376)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 4096, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_1025, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor([1, 1])]; + tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 1])]; + tensor var_1033_pad_type_0 = const()[name = tensor("op_1033_pad_type_0"), val = tensor("custom")]; + tensor var_1033_pad_0 = const()[name = tensor("op_1033_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1033_cast_fp16 = conv(dilations = var_1031, groups = var_974, pad = var_1033_pad_0, pad_type = var_1033_pad_type_0, strides = var_1029, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1033_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677632)))]; + tensor q_9_cast_fp16 = mul(x = var_1033_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_1037 = const()[name = tensor("op_1037"), val = tensor([1, 1])]; + tensor var_1039 = const()[name = tensor("op_1039"), val = tensor([1, 1])]; + tensor var_1041_pad_type_0 = const()[name = tensor("op_1041_pad_type_0"), val = tensor("custom")]; + tensor var_1041_pad_0 = const()[name = tensor("op_1041_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1041_cast_fp16 = conv(dilations = var_1039, groups = var_974, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1037, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1041_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685888)))]; + tensor k_11_cast_fp16 = mul(x = var_1041_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 1])]; + tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1, 1])]; + tensor var_1049_pad_type_0 = const()[name = tensor("op_1049_pad_type_0"), val = tensor("custom")]; + tensor var_1049_pad_0 = const()[name = tensor("op_1049_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1049_cast_fp16 = conv(dilations = var_1047, groups = var_974, pad = var_1049_pad_0, pad_type = var_1049_pad_type_0, strides = var_1045, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303694144)))]; + tensor v_11_cast_fp16 = mul(x = var_1049_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_1051 = const()[name = tensor("op_1051"), val = tensor([1, 32, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_1051, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_1053 = const()[name = tensor("op_1053"), val = tensor([1, 32, 128, 64])]; + tensor k_13_cast_fp16 = reshape(shape = var_1053, x = k_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1067_cast_fp16")]; + tensor var_1073_begin_0 = const()[name = tensor("op_1073_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1073_end_0 = const()[name = tensor("op_1073_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1073_end_mask_0 = const()[name = tensor("op_1073_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1073_cast_fp16 = slice_by_index(begin = var_1073_begin_0, end = var_1073_end_0, end_mask = var_1073_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_1073_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1075_cast_fp16 = mul(x = var_1073_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_1075_cast_fp16")]; tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; - tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; - tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; - tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; - tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; - tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; - tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_5_cast_fp16 = concat(axis = var_931, interleave = rotated_5_interleave_0, values = (var_1075_cast_fp16, var_1067_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_1078_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_1078_cast_fp16")]; + tensor var_1079_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_1079_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_1078_cast_fp16, y = var_1079_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_1092_begin_0 = const()[name = tensor("op_1092_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1092_end_0 = const()[name = tensor("op_1092_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1092_end_mask_0 = const()[name = tensor("op_1092_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1092_cast_fp16 = slice_by_index(begin = var_1092_begin_0, end = var_1092_end_0, end_mask = var_1092_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1092_cast_fp16")]; + tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_1098_cast_fp16")]; + tensor const_34_promoted_to_fp16 = const()[name = tensor("const_34_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1100_cast_fp16 = mul(x = var_1098_cast_fp16, y = const_34_promoted_to_fp16)[name = tensor("op_1100_cast_fp16")]; tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; - tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; - tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; - tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; - tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; - tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; - tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; - tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; - tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; - tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; - tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; - tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; - tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; - tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; - tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; - tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; - tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; - tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; - tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; - tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; - tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; - tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; - tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; - tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; - tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; - tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; - tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; - tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; - tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; - tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; - tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; - tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; - tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; - tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; - tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; - tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; - tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; - tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; - tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; - tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; - tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; - tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; - tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; - tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; - tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; - tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; - tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; - tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; - tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; - tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; - tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; - tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; - tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; - tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; - tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; - tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; - tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; - tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; - tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; - tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; - tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; - tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; - tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; - tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; - tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; - tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; - tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; - tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; - tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; - tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; - tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; - tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; - tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; - tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; - tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; - tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; - tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; - tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; - tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; - tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; - tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; - tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; - tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; - tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; - tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; - tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; - tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; - tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; - tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; - tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_7_cast_fp16 = concat(axis = var_931, interleave = rotated_7_interleave_0, values = (var_1100_cast_fp16, var_1092_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_1103_cast_fp16 = mul(x = k_13_cast_fp16, y = cos)[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_1104_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_1103_cast_fp16, y = var_1104_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor var_1107 = const()[name = tensor("op_1107"), val = tensor([1, 4096, 1, 64])]; + tensor var_1108_cast_fp16 = reshape(shape = var_1107, x = roped_7_cast_fp16)[name = tensor("op_1108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_1110, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_19_interleave_0 = const()[name = tensor("k_19_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_17_perm_0, x = var_1108_cast_fp16)[name = tensor("transpose_1")]; + tensor k_19_cast_fp16 = concat(axis = var_933, interleave = k_19_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_19_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_927, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_1117 = const()[name = tensor("op_1117"), val = tensor([1, 4096, 1, -1])]; + tensor q_15_cast_fp16 = reshape(shape = var_1117, x = roped_5_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1122_cast_fp16")]; + tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1126_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1130_cast_fp16")]; + tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1138_cast_fp16")]; + tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1142_cast_fp16")]; + tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1150_cast_fp16")]; + tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1154_cast_fp16")]; + tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1158_cast_fp16")]; + tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1162_cast_fp16")]; + tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1166_cast_fp16")]; + tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1170_cast_fp16")]; + tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1174_cast_fp16")]; + tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1178_cast_fp16")]; + tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1182_cast_fp16")]; + tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1186_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1190_cast_fp16")]; + tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1194_cast_fp16")]; + tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1238_begin_0 = const()[name = tensor("op_1238_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1238_end_0 = const()[name = tensor("op_1238_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_1238_end_mask_0 = const()[name = tensor("op_1238_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1238_cast_fp16 = slice_by_index(begin = var_1238_begin_0, end = var_1238_end_0, end_mask = var_1238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1252_cast_fp16")]; + tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1256_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1264_cast_fp16")]; + tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1272_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor var_1284_begin_0 = const()[name = tensor("op_1284_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_1284_end_0 = const()[name = tensor("op_1284_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_1284_end_mask_0 = const()[name = tensor("op_1284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1284_cast_fp16 = slice_by_index(begin = var_1284_begin_0, end = var_1284_end_0, end_mask = var_1284_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1284_cast_fp16")]; + tensor var_1288_begin_0 = const()[name = tensor("op_1288_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_1288_end_0 = const()[name = tensor("op_1288_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_1288_end_mask_0 = const()[name = tensor("op_1288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1288_cast_fp16 = slice_by_index(begin = var_1288_begin_0, end = var_1288_end_0, end_mask = var_1288_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1288_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor var_1296_begin_0 = const()[name = tensor("op_1296_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_1296_end_0 = const()[name = tensor("op_1296_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_1296_end_mask_0 = const()[name = tensor("op_1296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1296_cast_fp16 = slice_by_index(begin = var_1296_begin_0, end = var_1296_end_0, end_mask = var_1296_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1296_cast_fp16")]; + tensor var_1300_begin_0 = const()[name = tensor("op_1300_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_1300_end_0 = const()[name = tensor("op_1300_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_1300_end_mask_0 = const()[name = tensor("op_1300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1300_cast_fp16")]; + tensor var_1304_begin_0 = const()[name = tensor("op_1304_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_1304_end_0 = const()[name = tensor("op_1304_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_1304_end_mask_0 = const()[name = tensor("op_1304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = tensor("op_1312_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_1312_end_0 = const()[name = tensor("op_1312_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_1312_end_mask_0 = const()[name = tensor("op_1312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = var_1312_end_0, end_mask = var_1312_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1316_begin_0 = const()[name = tensor("op_1316_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_1316_end_0 = const()[name = tensor("op_1316_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_1316_end_mask_0 = const()[name = tensor("op_1316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1316_cast_fp16 = slice_by_index(begin = var_1316_begin_0, end = var_1316_end_0, end_mask = var_1316_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1316_cast_fp16")]; + tensor var_1320_begin_0 = const()[name = tensor("op_1320_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_1320_end_0 = const()[name = tensor("op_1320_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_1320_end_mask_0 = const()[name = tensor("op_1320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1320_cast_fp16 = slice_by_index(begin = var_1320_begin_0, end = var_1320_end_0, end_mask = var_1320_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1328_begin_0 = const()[name = tensor("op_1328_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_1328_end_0 = const()[name = tensor("op_1328_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_1328_end_mask_0 = const()[name = tensor("op_1328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1328_cast_fp16 = slice_by_index(begin = var_1328_begin_0, end = var_1328_end_0, end_mask = var_1328_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1332_begin_0 = const()[name = tensor("op_1332_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_1332_end_0 = const()[name = tensor("op_1332_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_1332_end_mask_0 = const()[name = tensor("op_1332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1332_cast_fp16 = slice_by_index(begin = var_1332_begin_0, end = var_1332_end_0, end_mask = var_1332_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1336_begin_0 = const()[name = tensor("op_1336_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_1336_end_0 = const()[name = tensor("op_1336_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_1336_end_mask_0 = const()[name = tensor("op_1336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1340_begin_0 = const()[name = tensor("op_1340_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_1340_end_0 = const()[name = tensor("op_1340_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_1340_end_mask_0 = const()[name = tensor("op_1340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1340_cast_fp16 = slice_by_index(begin = var_1340_begin_0, end = var_1340_end_0, end_mask = var_1340_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1344_begin_0 = const()[name = tensor("op_1344_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_1344_end_0 = const()[name = tensor("op_1344_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_1344_end_mask_0 = const()[name = tensor("op_1344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1348_begin_0 = const()[name = tensor("op_1348_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_1348_end_0 = const()[name = tensor("op_1348_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_1348_end_mask_0 = const()[name = tensor("op_1348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1348_cast_fp16 = slice_by_index(begin = var_1348_begin_0, end = var_1348_end_0, end_mask = var_1348_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1352_begin_0 = const()[name = tensor("op_1352_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_1352_end_0 = const()[name = tensor("op_1352_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_1352_end_mask_0 = const()[name = tensor("op_1352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1352_cast_fp16 = slice_by_index(begin = var_1352_begin_0, end = var_1352_end_0, end_mask = var_1352_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1352_cast_fp16")]; + tensor var_1356_begin_0 = const()[name = tensor("op_1356_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_1356_end_0 = const()[name = tensor("op_1356_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_1356_end_mask_0 = const()[name = tensor("op_1356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = var_1356_end_0, end_mask = var_1356_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1356_cast_fp16")]; + tensor var_1360_begin_0 = const()[name = tensor("op_1360_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_1360_end_0 = const()[name = tensor("op_1360_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_1360_end_mask_0 = const()[name = tensor("op_1360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1360_cast_fp16 = slice_by_index(begin = var_1360_begin_0, end = var_1360_end_0, end_mask = var_1360_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1360_cast_fp16")]; + tensor var_1364_begin_0 = const()[name = tensor("op_1364_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_1364_end_0 = const()[name = tensor("op_1364_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_1364_end_mask_0 = const()[name = tensor("op_1364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1364_cast_fp16 = slice_by_index(begin = var_1364_begin_0, end = var_1364_end_0, end_mask = var_1364_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1364_cast_fp16")]; + tensor var_1368_begin_0 = const()[name = tensor("op_1368_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_1368_end_0 = const()[name = tensor("op_1368_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_1368_end_mask_0 = const()[name = tensor("op_1368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1368_cast_fp16 = slice_by_index(begin = var_1368_begin_0, end = var_1368_end_0, end_mask = var_1368_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1368_cast_fp16")]; + tensor var_1372_begin_0 = const()[name = tensor("op_1372_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_1372_end_0 = const()[name = tensor("op_1372_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_1372_end_mask_0 = const()[name = tensor("op_1372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1372_cast_fp16 = slice_by_index(begin = var_1372_begin_0, end = var_1372_end_0, end_mask = var_1372_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1372_cast_fp16")]; + tensor var_1376_begin_0 = const()[name = tensor("op_1376_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_1376_end_0 = const()[name = tensor("op_1376_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_1376_end_mask_0 = const()[name = tensor("op_1376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1376_cast_fp16 = slice_by_index(begin = var_1376_begin_0, end = var_1376_end_0, end_mask = var_1376_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_begin_0 = const()[name = tensor("op_1378_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1378_end_0 = const()[name = tensor("op_1378_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1378_end_mask_0 = const()[name = tensor("op_1378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1378_cast_fp16 = slice_by_index(begin = var_1378_begin_0, end = var_1378_end_0, end_mask = var_1378_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = tensor("op_1382_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1382_end_0 = const()[name = tensor("op_1382_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1382_end_mask_0 = const()[name = tensor("op_1382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = var_1382_end_0, end_mask = var_1382_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1386_begin_0 = const()[name = tensor("op_1386_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1386_end_0 = const()[name = tensor("op_1386_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1386_end_mask_0 = const()[name = tensor("op_1386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1386_cast_fp16 = slice_by_index(begin = var_1386_begin_0, end = var_1386_end_0, end_mask = var_1386_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1390_begin_0 = const()[name = tensor("op_1390_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1390_end_0 = const()[name = tensor("op_1390_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1390_end_mask_0 = const()[name = tensor("op_1390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1390_cast_fp16 = slice_by_index(begin = var_1390_begin_0, end = var_1390_end_0, end_mask = var_1390_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1394_begin_0 = const()[name = tensor("op_1394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1394_end_0 = const()[name = tensor("op_1394_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1394_end_mask_0 = const()[name = tensor("op_1394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1394_cast_fp16 = slice_by_index(begin = var_1394_begin_0, end = var_1394_end_0, end_mask = var_1394_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1398_begin_0 = const()[name = tensor("op_1398_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1398_end_0 = const()[name = tensor("op_1398_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1398_end_mask_0 = const()[name = tensor("op_1398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1398_cast_fp16 = slice_by_index(begin = var_1398_begin_0, end = var_1398_end_0, end_mask = var_1398_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1402_begin_0 = const()[name = tensor("op_1402_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1402_end_0 = const()[name = tensor("op_1402_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1402_end_mask_0 = const()[name = tensor("op_1402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1402_cast_fp16 = slice_by_index(begin = var_1402_begin_0, end = var_1402_end_0, end_mask = var_1402_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1406_begin_0 = const()[name = tensor("op_1406_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1406_end_0 = const()[name = tensor("op_1406_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1406_end_mask_0 = const()[name = tensor("op_1406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1406_cast_fp16 = slice_by_index(begin = var_1406_begin_0, end = var_1406_end_0, end_mask = var_1406_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1410_begin_0 = const()[name = tensor("op_1410_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1410_end_0 = const()[name = tensor("op_1410_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_1410_end_mask_0 = const()[name = tensor("op_1410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1410_cast_fp16 = slice_by_index(begin = var_1410_begin_0, end = var_1410_end_0, end_mask = var_1410_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1414_begin_0 = const()[name = tensor("op_1414_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1414_end_0 = const()[name = tensor("op_1414_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_1414_end_mask_0 = const()[name = tensor("op_1414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1414_cast_fp16 = slice_by_index(begin = var_1414_begin_0, end = var_1414_end_0, end_mask = var_1414_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1418_begin_0 = const()[name = tensor("op_1418_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_1418_end_0 = const()[name = tensor("op_1418_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_1418_end_mask_0 = const()[name = tensor("op_1418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1418_cast_fp16 = slice_by_index(begin = var_1418_begin_0, end = var_1418_end_0, end_mask = var_1418_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1422_begin_0 = const()[name = tensor("op_1422_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_1422_end_0 = const()[name = tensor("op_1422_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_1422_end_mask_0 = const()[name = tensor("op_1422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = var_1422_end_0, end_mask = var_1422_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1426_begin_0 = const()[name = tensor("op_1426_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_1426_end_0 = const()[name = tensor("op_1426_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_1426_end_mask_0 = const()[name = tensor("op_1426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1426_cast_fp16 = slice_by_index(begin = var_1426_begin_0, end = var_1426_end_0, end_mask = var_1426_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1430_begin_0 = const()[name = tensor("op_1430_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_1430_end_0 = const()[name = tensor("op_1430_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_1430_end_mask_0 = const()[name = tensor("op_1430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1430_cast_fp16 = slice_by_index(begin = var_1430_begin_0, end = var_1430_end_0, end_mask = var_1430_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1434_begin_0 = const()[name = tensor("op_1434_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_1434_end_0 = const()[name = tensor("op_1434_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_1434_end_mask_0 = const()[name = tensor("op_1434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1434_cast_fp16 = slice_by_index(begin = var_1434_begin_0, end = var_1434_end_0, end_mask = var_1434_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1438_begin_0 = const()[name = tensor("op_1438_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_1438_end_0 = const()[name = tensor("op_1438_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_1438_end_mask_0 = const()[name = tensor("op_1438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1438_cast_fp16 = slice_by_index(begin = var_1438_begin_0, end = var_1438_end_0, end_mask = var_1438_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1442_begin_0 = const()[name = tensor("op_1442_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_1442_end_0 = const()[name = tensor("op_1442_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_1442_end_mask_0 = const()[name = tensor("op_1442_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1442_cast_fp16 = slice_by_index(begin = var_1442_begin_0, end = var_1442_end_0, end_mask = var_1442_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1478_cast_fp16")]; + tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1482_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1490_cast_fp16")]; + tensor var_1494_begin_0 = const()[name = tensor("op_1494_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_1494_end_0 = const()[name = tensor("op_1494_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_1494_end_mask_0 = const()[name = tensor("op_1494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1494_cast_fp16 = slice_by_index(begin = var_1494_begin_0, end = var_1494_end_0, end_mask = var_1494_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1494_cast_fp16")]; + tensor var_1498_begin_0 = const()[name = tensor("op_1498_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_1498_end_0 = const()[name = tensor("op_1498_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_1498_end_mask_0 = const()[name = tensor("op_1498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1498_cast_fp16 = slice_by_index(begin = var_1498_begin_0, end = var_1498_end_0, end_mask = var_1498_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; + tensor var_1502_begin_0 = const()[name = tensor("op_1502_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_1502_end_0 = const()[name = tensor("op_1502_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_1502_end_mask_0 = const()[name = tensor("op_1502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = var_1502_end_0, end_mask = var_1502_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; + tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1252_cast_fp16, var_1122_cast_fp16))[name = tensor("op_1506_cast_fp16")]; + tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1508_cast_fp16 = mul(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = tensor("op_1508_cast_fp16")]; + tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1256_cast_fp16, var_1126_cast_fp16))[name = tensor("op_1510_cast_fp16")]; + tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1512_cast_fp16 = mul(x = var_1510_cast_fp16, y = var_1511_to_fp16)[name = tensor("op_1512_cast_fp16")]; + tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1260_cast_fp16, var_1130_cast_fp16))[name = tensor("op_1514_cast_fp16")]; + tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1516_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("op_1516_cast_fp16")]; + tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1264_cast_fp16, var_1134_cast_fp16))[name = tensor("op_1518_cast_fp16")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1520_cast_fp16 = mul(x = var_1518_cast_fp16, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; + tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1268_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1522_cast_fp16")]; + tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1524_cast_fp16 = mul(x = var_1522_cast_fp16, y = var_1523_to_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1272_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1526_cast_fp16")]; + tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1528_cast_fp16 = mul(x = var_1526_cast_fp16, y = var_1527_to_fp16)[name = tensor("op_1528_cast_fp16")]; + tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1276_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1530_cast_fp16")]; + tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1532_cast_fp16 = mul(x = var_1530_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1280_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1534_cast_fp16")]; + tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1536_cast_fp16 = mul(x = var_1534_cast_fp16, y = var_1535_to_fp16)[name = tensor("op_1536_cast_fp16")]; + tensor var_1538_equation_0 = const()[name = tensor("op_1538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1538_cast_fp16 = einsum(equation = var_1538_equation_0, values = (var_1284_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1538_cast_fp16")]; + tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1540_cast_fp16 = mul(x = var_1538_cast_fp16, y = var_1539_to_fp16)[name = tensor("op_1540_cast_fp16")]; + tensor var_1542_equation_0 = const()[name = tensor("op_1542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1542_cast_fp16 = einsum(equation = var_1542_equation_0, values = (var_1288_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1542_cast_fp16")]; + tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1544_cast_fp16 = mul(x = var_1542_cast_fp16, y = var_1543_to_fp16)[name = tensor("op_1544_cast_fp16")]; + tensor var_1546_equation_0 = const()[name = tensor("op_1546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1546_cast_fp16 = einsum(equation = var_1546_equation_0, values = (var_1292_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1546_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1548_cast_fp16 = mul(x = var_1546_cast_fp16, y = var_1547_to_fp16)[name = tensor("op_1548_cast_fp16")]; + tensor var_1550_equation_0 = const()[name = tensor("op_1550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1550_cast_fp16 = einsum(equation = var_1550_equation_0, values = (var_1296_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1550_cast_fp16")]; + tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1552_cast_fp16 = mul(x = var_1550_cast_fp16, y = var_1551_to_fp16)[name = tensor("op_1552_cast_fp16")]; + tensor var_1554_equation_0 = const()[name = tensor("op_1554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1554_cast_fp16 = einsum(equation = var_1554_equation_0, values = (var_1300_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1554_cast_fp16")]; + tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1556_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1555_to_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_equation_0 = const()[name = tensor("op_1558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1558_cast_fp16 = einsum(equation = var_1558_equation_0, values = (var_1304_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1558_cast_fp16")]; + tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1560_cast_fp16 = mul(x = var_1558_cast_fp16, y = var_1559_to_fp16)[name = tensor("op_1560_cast_fp16")]; + tensor var_1562_equation_0 = const()[name = tensor("op_1562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1562_cast_fp16 = einsum(equation = var_1562_equation_0, values = (var_1308_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1562_cast_fp16")]; + tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1564_cast_fp16 = mul(x = var_1562_cast_fp16, y = var_1563_to_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor var_1566_equation_0 = const()[name = tensor("op_1566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1566_cast_fp16 = einsum(equation = var_1566_equation_0, values = (var_1312_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1566_cast_fp16")]; + tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1568_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor var_1570_equation_0 = const()[name = tensor("op_1570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1570_cast_fp16 = einsum(equation = var_1570_equation_0, values = (var_1316_cast_fp16, var_1186_cast_fp16))[name = tensor("op_1570_cast_fp16")]; + tensor var_1571_to_fp16 = const()[name = tensor("op_1571_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1572_cast_fp16 = mul(x = var_1570_cast_fp16, y = var_1571_to_fp16)[name = tensor("op_1572_cast_fp16")]; + tensor var_1574_equation_0 = const()[name = tensor("op_1574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1574_cast_fp16 = einsum(equation = var_1574_equation_0, values = (var_1320_cast_fp16, var_1190_cast_fp16))[name = tensor("op_1574_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = tensor("op_1575_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1576_cast_fp16 = mul(x = var_1574_cast_fp16, y = var_1575_to_fp16)[name = tensor("op_1576_cast_fp16")]; + tensor var_1578_equation_0 = const()[name = tensor("op_1578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1578_cast_fp16 = einsum(equation = var_1578_equation_0, values = (var_1324_cast_fp16, var_1194_cast_fp16))[name = tensor("op_1578_cast_fp16")]; + tensor var_1579_to_fp16 = const()[name = tensor("op_1579_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1580_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = tensor("op_1580_cast_fp16")]; + tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1328_cast_fp16, var_1198_cast_fp16))[name = tensor("op_1582_cast_fp16")]; + tensor var_1583_to_fp16 = const()[name = tensor("op_1583_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1584_cast_fp16 = mul(x = var_1582_cast_fp16, y = var_1583_to_fp16)[name = tensor("op_1584_cast_fp16")]; + tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1332_cast_fp16, var_1202_cast_fp16))[name = tensor("op_1586_cast_fp16")]; + tensor var_1587_to_fp16 = const()[name = tensor("op_1587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1588_cast_fp16 = mul(x = var_1586_cast_fp16, y = var_1587_to_fp16)[name = tensor("op_1588_cast_fp16")]; + tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1336_cast_fp16, var_1206_cast_fp16))[name = tensor("op_1590_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1592_cast_fp16 = mul(x = var_1590_cast_fp16, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1340_cast_fp16, var_1210_cast_fp16))[name = tensor("op_1594_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = tensor("op_1595_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1596_cast_fp16 = mul(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = tensor("op_1596_cast_fp16")]; + tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1344_cast_fp16, var_1214_cast_fp16))[name = tensor("op_1598_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1600_cast_fp16 = mul(x = var_1598_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1348_cast_fp16, var_1218_cast_fp16))[name = tensor("op_1602_cast_fp16")]; + tensor var_1603_to_fp16 = const()[name = tensor("op_1603_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1604_cast_fp16 = mul(x = var_1602_cast_fp16, y = var_1603_to_fp16)[name = tensor("op_1604_cast_fp16")]; + tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1352_cast_fp16, var_1222_cast_fp16))[name = tensor("op_1606_cast_fp16")]; + tensor var_1607_to_fp16 = const()[name = tensor("op_1607_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1608_cast_fp16 = mul(x = var_1606_cast_fp16, y = var_1607_to_fp16)[name = tensor("op_1608_cast_fp16")]; + tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1356_cast_fp16, var_1226_cast_fp16))[name = tensor("op_1610_cast_fp16")]; + tensor var_1611_to_fp16 = const()[name = tensor("op_1611_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1612_cast_fp16 = mul(x = var_1610_cast_fp16, y = var_1611_to_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1360_cast_fp16, var_1230_cast_fp16))[name = tensor("op_1614_cast_fp16")]; + tensor var_1615_to_fp16 = const()[name = tensor("op_1615_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1616_cast_fp16 = mul(x = var_1614_cast_fp16, y = var_1615_to_fp16)[name = tensor("op_1616_cast_fp16")]; + tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1364_cast_fp16, var_1234_cast_fp16))[name = tensor("op_1618_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = tensor("op_1619_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1620_cast_fp16 = mul(x = var_1618_cast_fp16, y = var_1619_to_fp16)[name = tensor("op_1620_cast_fp16")]; + tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1368_cast_fp16, var_1238_cast_fp16))[name = tensor("op_1622_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = tensor("op_1623_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1624_cast_fp16 = mul(x = var_1622_cast_fp16, y = var_1623_to_fp16)[name = tensor("op_1624_cast_fp16")]; + tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1372_cast_fp16, var_1242_cast_fp16))[name = tensor("op_1626_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1628_cast_fp16 = mul(x = var_1626_cast_fp16, y = var_1627_to_fp16)[name = tensor("op_1628_cast_fp16")]; + tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1376_cast_fp16, var_1246_cast_fp16))[name = tensor("op_1630_cast_fp16")]; + tensor var_1631_to_fp16 = const()[name = tensor("op_1631_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1632_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1631_to_fp16)[name = tensor("op_1632_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1508_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1512_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1516_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1520_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1524_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1528_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1532_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1536_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1540_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1544_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1548_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1552_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1556_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1560_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1564_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_95_cast_fp16 = add(x = var_1568_cast_fp16, y = mask)[name = tensor("aw_95_cast_fp16")]; + tensor aw_97_cast_fp16 = add(x = var_1572_cast_fp16, y = mask)[name = tensor("aw_97_cast_fp16")]; + tensor aw_99_cast_fp16 = add(x = var_1576_cast_fp16, y = mask)[name = tensor("aw_99_cast_fp16")]; + tensor aw_101_cast_fp16 = add(x = var_1580_cast_fp16, y = mask)[name = tensor("aw_101_cast_fp16")]; + tensor aw_103_cast_fp16 = add(x = var_1584_cast_fp16, y = mask)[name = tensor("aw_103_cast_fp16")]; + tensor aw_105_cast_fp16 = add(x = var_1588_cast_fp16, y = mask)[name = tensor("aw_105_cast_fp16")]; + tensor aw_107_cast_fp16 = add(x = var_1592_cast_fp16, y = mask)[name = tensor("aw_107_cast_fp16")]; + tensor aw_109_cast_fp16 = add(x = var_1596_cast_fp16, y = mask)[name = tensor("aw_109_cast_fp16")]; + tensor aw_111_cast_fp16 = add(x = var_1600_cast_fp16, y = mask)[name = tensor("aw_111_cast_fp16")]; + tensor aw_113_cast_fp16 = add(x = var_1604_cast_fp16, y = mask)[name = tensor("aw_113_cast_fp16")]; + tensor aw_115_cast_fp16 = add(x = var_1608_cast_fp16, y = mask)[name = tensor("aw_115_cast_fp16")]; + tensor aw_117_cast_fp16 = add(x = var_1612_cast_fp16, y = mask)[name = tensor("aw_117_cast_fp16")]; + tensor aw_119_cast_fp16 = add(x = var_1616_cast_fp16, y = mask)[name = tensor("aw_119_cast_fp16")]; + tensor aw_121_cast_fp16 = add(x = var_1620_cast_fp16, y = mask)[name = tensor("aw_121_cast_fp16")]; + tensor aw_123_cast_fp16 = add(x = var_1624_cast_fp16, y = mask)[name = tensor("aw_123_cast_fp16")]; + tensor aw_125_cast_fp16 = add(x = var_1628_cast_fp16, y = mask)[name = tensor("aw_125_cast_fp16")]; + tensor aw_127_cast_fp16 = add(x = var_1632_cast_fp16, y = mask)[name = tensor("aw_127_cast_fp16")]; + tensor var_1665_cast_fp16 = softmax(axis = var_974, x = aw_65_cast_fp16)[name = tensor("op_1665_cast_fp16")]; + tensor var_1666_cast_fp16 = softmax(axis = var_974, x = aw_67_cast_fp16)[name = tensor("op_1666_cast_fp16")]; + tensor var_1667_cast_fp16 = softmax(axis = var_974, x = aw_69_cast_fp16)[name = tensor("op_1667_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_974, x = aw_71_cast_fp16)[name = tensor("op_1668_cast_fp16")]; + tensor var_1669_cast_fp16 = softmax(axis = var_974, x = aw_73_cast_fp16)[name = tensor("op_1669_cast_fp16")]; + tensor var_1670_cast_fp16 = softmax(axis = var_974, x = aw_75_cast_fp16)[name = tensor("op_1670_cast_fp16")]; + tensor var_1671_cast_fp16 = softmax(axis = var_974, x = aw_77_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_974, x = aw_79_cast_fp16)[name = tensor("op_1672_cast_fp16")]; + tensor var_1673_cast_fp16 = softmax(axis = var_974, x = aw_81_cast_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor var_1674_cast_fp16 = softmax(axis = var_974, x = aw_83_cast_fp16)[name = tensor("op_1674_cast_fp16")]; + tensor var_1675_cast_fp16 = softmax(axis = var_974, x = aw_85_cast_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_cast_fp16 = softmax(axis = var_974, x = aw_87_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor var_1677_cast_fp16 = softmax(axis = var_974, x = aw_89_cast_fp16)[name = tensor("op_1677_cast_fp16")]; + tensor var_1678_cast_fp16 = softmax(axis = var_974, x = aw_91_cast_fp16)[name = tensor("op_1678_cast_fp16")]; + tensor var_1679_cast_fp16 = softmax(axis = var_974, x = aw_93_cast_fp16)[name = tensor("op_1679_cast_fp16")]; + tensor var_1680_cast_fp16 = softmax(axis = var_974, x = aw_95_cast_fp16)[name = tensor("op_1680_cast_fp16")]; + tensor var_1681_cast_fp16 = softmax(axis = var_974, x = aw_97_cast_fp16)[name = tensor("op_1681_cast_fp16")]; + tensor var_1682_cast_fp16 = softmax(axis = var_974, x = aw_99_cast_fp16)[name = tensor("op_1682_cast_fp16")]; + tensor var_1683_cast_fp16 = softmax(axis = var_974, x = aw_101_cast_fp16)[name = tensor("op_1683_cast_fp16")]; + tensor var_1684_cast_fp16 = softmax(axis = var_974, x = aw_103_cast_fp16)[name = tensor("op_1684_cast_fp16")]; + tensor var_1685_cast_fp16 = softmax(axis = var_974, x = aw_105_cast_fp16)[name = tensor("op_1685_cast_fp16")]; + tensor var_1686_cast_fp16 = softmax(axis = var_974, x = aw_107_cast_fp16)[name = tensor("op_1686_cast_fp16")]; + tensor var_1687_cast_fp16 = softmax(axis = var_974, x = aw_109_cast_fp16)[name = tensor("op_1687_cast_fp16")]; + tensor var_1688_cast_fp16 = softmax(axis = var_974, x = aw_111_cast_fp16)[name = tensor("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = softmax(axis = var_974, x = aw_113_cast_fp16)[name = tensor("op_1689_cast_fp16")]; + tensor var_1690_cast_fp16 = softmax(axis = var_974, x = aw_115_cast_fp16)[name = tensor("op_1690_cast_fp16")]; + tensor var_1691_cast_fp16 = softmax(axis = var_974, x = aw_117_cast_fp16)[name = tensor("op_1691_cast_fp16")]; + tensor var_1692_cast_fp16 = softmax(axis = var_974, x = aw_119_cast_fp16)[name = tensor("op_1692_cast_fp16")]; + tensor var_1693_cast_fp16 = softmax(axis = var_974, x = aw_121_cast_fp16)[name = tensor("op_1693_cast_fp16")]; + tensor var_1694_cast_fp16 = softmax(axis = var_974, x = aw_123_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1695_cast_fp16 = softmax(axis = var_974, x = aw_125_cast_fp16)[name = tensor("op_1695_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_974, x = aw_127_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1378_cast_fp16, var_1665_cast_fp16))[name = tensor("op_1698_cast_fp16")]; + tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1382_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1700_cast_fp16")]; + tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1386_cast_fp16, var_1667_cast_fp16))[name = tensor("op_1702_cast_fp16")]; + tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1390_cast_fp16, var_1668_cast_fp16))[name = tensor("op_1704_cast_fp16")]; + tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1394_cast_fp16, var_1669_cast_fp16))[name = tensor("op_1706_cast_fp16")]; + tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1398_cast_fp16, var_1670_cast_fp16))[name = tensor("op_1708_cast_fp16")]; + tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1402_cast_fp16, var_1671_cast_fp16))[name = tensor("op_1710_cast_fp16")]; + tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1406_cast_fp16, var_1672_cast_fp16))[name = tensor("op_1712_cast_fp16")]; + tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1410_cast_fp16, var_1673_cast_fp16))[name = tensor("op_1714_cast_fp16")]; + tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1414_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1716_cast_fp16")]; + tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1418_cast_fp16, var_1675_cast_fp16))[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1422_cast_fp16, var_1676_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1426_cast_fp16, var_1677_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1430_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1434_cast_fp16, var_1679_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1438_cast_fp16, var_1680_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1442_cast_fp16, var_1681_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1446_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1450_cast_fp16, var_1683_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1454_cast_fp16, var_1684_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1458_cast_fp16, var_1685_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1462_cast_fp16, var_1686_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1466_cast_fp16, var_1687_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1470_cast_fp16, var_1688_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1474_cast_fp16, var_1689_cast_fp16))[name = tensor("op_1746_cast_fp16")]; + tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1478_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1748_cast_fp16")]; + tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1482_cast_fp16, var_1691_cast_fp16))[name = tensor("op_1750_cast_fp16")]; + tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1486_cast_fp16, var_1692_cast_fp16))[name = tensor("op_1752_cast_fp16")]; + tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1490_cast_fp16, var_1693_cast_fp16))[name = tensor("op_1754_cast_fp16")]; + tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1494_cast_fp16, var_1694_cast_fp16))[name = tensor("op_1756_cast_fp16")]; + tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1498_cast_fp16, var_1695_cast_fp16))[name = tensor("op_1758_cast_fp16")]; + tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1502_cast_fp16, var_1696_cast_fp16))[name = tensor("op_1760_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_974, interleave = x_27_interleave_0, values = (var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16, var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 4096, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1765, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 1])]; + tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 1])]; + tensor var_1773_pad_type_0 = const()[name = tensor("op_1773_pad_type_0"), val = tensor("custom")]; + tensor var_1773_pad_0 = const()[name = tensor("op_1773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1773_cast_fp16 = conv(dilations = var_1771, groups = var_974, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1769, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_13_cast_fp16)[name = tensor("op_1773_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303702400)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_1773_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_7_interleave_0 = const()[name = tensor("x_eps_7_interleave_0"), val = tensor(false)]; + tensor eps_chan_7_to_fp16 = const()[name = tensor("eps_chan_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710656)))]; + tensor x_eps_7_cast_fp16 = concat(axis = var_974, interleave = x_eps_7_interleave_0, values = (x_29_cast_fp16, eps_chan_7_to_fp16))[name = tensor("x_eps_7_cast_fp16")]; + tensor norm_x_7_axes_0 = const()[name = tensor("norm_x_7_axes_0"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_l2_norm(axes = norm_x_7_axes_0, keep_dims = var_977, x = x_eps_7_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_7_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1798_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710848)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor([1, 1])]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor([1, 1])]; + tensor var_1814_pad_type_0 = const()[name = tensor("op_1814_pad_type_0"), val = tensor("custom")]; + tensor var_1814_pad_0 = const()[name = tensor("op_1814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1814_cast_fp16 = conv(dilations = var_1812, groups = var_974, pad = var_1814_pad_0, pad_type = var_1814_pad_type_0, strides = var_1810, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1814_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303719104)))]; + tensor input_17_cast_fp16 = mul(x = var_1814_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1818 = const()[name = tensor("op_1818"), val = tensor([1, 1])]; + tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([1, 1])]; + tensor var_1822_pad_type_0 = const()[name = tensor("op_1822_pad_type_0"), val = tensor("custom")]; + tensor var_1822_pad_0 = const()[name = tensor("op_1822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1822_cast_fp16 = conv(dilations = var_1820, groups = var_974, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1818, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_1822_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303741184)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_1822_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_1824_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1824_cast_fp16")]; + tensor input_19_cast_fp16 = mul(x = var_1824_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_1828 = const()[name = tensor("op_1828"), val = tensor([1, 1])]; + tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; + tensor var_1832_pad_type_0 = const()[name = tensor("op_1832_pad_type_0"), val = tensor("custom")]; + tensor var_1832_pad_0 = const()[name = tensor("op_1832_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1832_cast_fp16 = conv(dilations = var_1830, groups = var_974, pad = var_1832_pad_0, pad_type = var_1832_pad_type_0, strides = var_1828, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_1832_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303763264)))]; + tensor var_1833_cast_fp16 = mul(x = var_1832_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = var_1833_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_1839 = const()[name = tensor("op_1839"), val = tensor(-1)]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor(-2)]; + tensor var_1845 = const()[name = tensor("op_1845"), val = tensor(-3)]; + tensor var_1886 = const()[name = tensor("op_1886"), val = tensor(1)]; + tensor var_1889 = const()[name = tensor("op_1889"), val = tensor(true)]; + tensor x_eps_9_interleave_0 = const()[name = tensor("x_eps_9_interleave_0"), val = tensor(false)]; + tensor eps_chan_9_to_fp16 = const()[name = tensor("eps_chan_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771520)))]; + tensor x_eps_9_cast_fp16 = concat(axis = var_1886, interleave = x_eps_9_interleave_0, values = (x_33_cast_fp16, eps_chan_9_to_fp16))[name = tensor("x_eps_9_cast_fp16")]; + tensor norm_x_9_axes_0 = const()[name = tensor("norm_x_9_axes_0"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_l2_norm(axes = norm_x_9_axes_0, keep_dims = var_1889, x = x_eps_9_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor x_normed_25_cast_fp16 = real_div(x = x_33_cast_fp16, y = norm_x_9_cast_fp16)[name = tensor("x_normed_25_cast_fp16")]; + tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_27_cast_fp16 = mul(x = x_normed_25_cast_fp16, y = var_1912_to_fp16)[name = tensor("x_normed_27_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303771712)))]; + tensor x_37_cast_fp16 = mul(x = x_normed_27_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_1937 = const()[name = tensor("op_1937"), val = tensor([1, 4096, 1, -1])]; + tensor input_21_cast_fp16 = reshape(shape = var_1937, x = x_37_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_1941 = const()[name = tensor("op_1941"), val = tensor([1, 1])]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor([1, 1])]; + tensor var_1945_pad_type_0 = const()[name = tensor("op_1945_pad_type_0"), val = tensor("custom")]; + tensor var_1945_pad_0 = const()[name = tensor("op_1945_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1945_cast_fp16 = conv(dilations = var_1943, groups = var_1886, pad = var_1945_pad_0, pad_type = var_1945_pad_type_0, strides = var_1941, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1945_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779968)))]; + tensor q_17_cast_fp16 = mul(x = var_1945_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_17_cast_fp16")]; + tensor var_1949 = const()[name = tensor("op_1949"), val = tensor([1, 1])]; + tensor var_1951 = const()[name = tensor("op_1951"), val = tensor([1, 1])]; + tensor var_1953_pad_type_0 = const()[name = tensor("op_1953_pad_type_0"), val = tensor("custom")]; + tensor var_1953_pad_0 = const()[name = tensor("op_1953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1953_cast_fp16 = conv(dilations = var_1951, groups = var_1886, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1949, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1953_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303788224)))]; + tensor k_21_cast_fp16 = mul(x = var_1953_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_21_cast_fp16")]; + tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 1])]; + tensor var_1959 = const()[name = tensor("op_1959"), val = tensor([1, 1])]; + tensor var_1961_pad_type_0 = const()[name = tensor("op_1961_pad_type_0"), val = tensor("custom")]; + tensor var_1961_pad_0 = const()[name = tensor("op_1961_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1961_cast_fp16 = conv(dilations = var_1959, groups = var_1886, pad = var_1961_pad_0, pad_type = var_1961_pad_type_0, strides = var_1957, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = input_21_cast_fp16)[name = tensor("op_1961_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303796480)))]; + tensor v_21_cast_fp16 = mul(x = var_1961_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_21_cast_fp16")]; + tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 32, 128, 64])]; + tensor q_19_cast_fp16 = reshape(shape = var_1963, x = q_17_cast_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_1965 = const()[name = tensor("op_1965"), val = tensor([1, 32, 128, 64])]; + tensor k_23_cast_fp16 = reshape(shape = var_1965, x = k_21_cast_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1979_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor const_53_promoted_to_fp16 = const()[name = tensor("const_53_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_1987_cast_fp16 = mul(x = var_1985_cast_fp16, y = const_53_promoted_to_fp16)[name = tensor("op_1987_cast_fp16")]; tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; - tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; - tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; - tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; - tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; - tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; - tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; - tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; - tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; - tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; - tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; - tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_9_cast_fp16 = concat(axis = var_1843, interleave = rotated_9_interleave_0, values = (var_1987_cast_fp16, var_1979_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_1990_cast_fp16 = mul(x = q_19_cast_fp16, y = cos)[name = tensor("op_1990_cast_fp16")]; + tensor var_1991_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_1991_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_1990_cast_fp16, y = var_1991_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2004_cast_fp16")]; + tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_2010_cast_fp16")]; + tensor const_55_promoted_to_fp16 = const()[name = tensor("const_55_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_2012_cast_fp16 = mul(x = var_2010_cast_fp16, y = const_55_promoted_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; - tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; - tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; - tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; - tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; - tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; - tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; - tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; - tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor rotated_cast_fp16 = concat(axis = var_1843, interleave = rotated_interleave_0, values = (var_2012_cast_fp16, var_2004_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_2015_cast_fp16 = mul(x = k_23_cast_fp16, y = cos)[name = tensor("op_2015_cast_fp16")]; + tensor var_2016_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_2016_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_2015_cast_fp16, y = var_2016_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_2019 = const()[name = tensor("op_2019"), val = tensor([1, 4096, 1, 64])]; + tensor var_2020_cast_fp16 = reshape(shape = var_2019, x = roped_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 4096, 1, 64])]; + tensor new_v_cache_2 = reshape(shape = var_2022, x = v_21_cast_fp16)[name = tensor("new_v_cache_2_type_fp32_cast_fp16")]; tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; - tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; - tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; - tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; - tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; - tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; - tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; - tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; - tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; - tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; - tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; - tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; - tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; - tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; - tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; - tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; - tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; - tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; - tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; - tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; - tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; - tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; - tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; - tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; - tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; - tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; - tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; - tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; - tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; - tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; - tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; - tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; - tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; - tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; - tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; - tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; - tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; - tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; - tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; - tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; - tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; - tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; - tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; - tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; - tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; - tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + tensor new_k_cache_2 = transpose(perm = k_27_perm_0, x = var_2020_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_1845, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_27_interleave_0 = const()[name = tensor("v_27_interleave_0"), val = tensor(false)]; + tensor v_27_cast_fp16 = concat(axis = var_1839, interleave = v_27_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_27_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1, 4096, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_2029, x = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = q_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = q_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = q_cast_fp16)[name = tensor("op_2042_cast_fp16")]; + tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = q_cast_fp16)[name = tensor("op_2046_cast_fp16")]; + tensor var_2050_begin_0 = const()[name = tensor("op_2050_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2050_end_0 = const()[name = tensor("op_2050_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_2050_end_mask_0 = const()[name = tensor("op_2050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2050_cast_fp16 = slice_by_index(begin = var_2050_begin_0, end = var_2050_end_0, end_mask = var_2050_end_mask_0, x = q_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = q_cast_fp16)[name = tensor("op_2054_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = q_cast_fp16)[name = tensor("op_2058_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = q_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = q_cast_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = q_cast_fp16)[name = tensor("op_2070_cast_fp16")]; + tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = q_cast_fp16)[name = tensor("op_2074_cast_fp16")]; + tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = q_cast_fp16)[name = tensor("op_2078_cast_fp16")]; + tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = q_cast_fp16)[name = tensor("op_2082_cast_fp16")]; + tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = q_cast_fp16)[name = tensor("op_2086_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = q_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = q_cast_fp16)[name = tensor("op_2094_cast_fp16")]; + tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = q_cast_fp16)[name = tensor("op_2098_cast_fp16")]; + tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = q_cast_fp16)[name = tensor("op_2102_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = q_cast_fp16)[name = tensor("op_2106_cast_fp16")]; + tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = q_cast_fp16)[name = tensor("op_2110_cast_fp16")]; + tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = q_cast_fp16)[name = tensor("op_2114_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = q_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = q_cast_fp16)[name = tensor("op_2122_cast_fp16")]; + tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = q_cast_fp16)[name = tensor("op_2126_cast_fp16")]; + tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 3200, 1, 64])]; + tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = q_cast_fp16)[name = tensor("op_2130_cast_fp16")]; + tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 3328, 1, 64])]; + tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = q_cast_fp16)[name = tensor("op_2134_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 3456, 1, 64])]; + tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = q_cast_fp16)[name = tensor("op_2138_cast_fp16")]; + tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 3584, 1, 64])]; + tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = q_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 3712, 1, 64])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = q_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 3840, 1, 64])]; + tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = q_cast_fp16)[name = tensor("op_2150_cast_fp16")]; + tensor var_2154_begin_0 = const()[name = tensor("op_2154_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2154_end_0 = const()[name = tensor("op_2154_end_0"), val = tensor([1, 3968, 1, 64])]; + tensor var_2154_end_mask_0 = const()[name = tensor("op_2154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2154_cast_fp16 = slice_by_index(begin = var_2154_begin_0, end = var_2154_end_0, end_mask = var_2154_end_mask_0, x = q_cast_fp16)[name = tensor("op_2154_cast_fp16")]; + tensor var_2158_begin_0 = const()[name = tensor("op_2158_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2158_end_0 = const()[name = tensor("op_2158_end_0"), val = tensor([1, 4096, 1, 64])]; + tensor var_2158_end_mask_0 = const()[name = tensor("op_2158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2158_cast_fp16 = slice_by_index(begin = var_2158_begin_0, end = var_2158_end_0, end_mask = var_2158_end_mask_0, x = q_cast_fp16)[name = tensor("op_2158_cast_fp16")]; + tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = k_cast_fp16)[name = tensor("op_2164_cast_fp16")]; + tensor var_2168_begin_0 = const()[name = tensor("op_2168_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2168_end_0 = const()[name = tensor("op_2168_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_2168_end_mask_0 = const()[name = tensor("op_2168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2168_cast_fp16 = slice_by_index(begin = var_2168_begin_0, end = var_2168_end_0, end_mask = var_2168_end_mask_0, x = k_cast_fp16)[name = tensor("op_2168_cast_fp16")]; + tensor var_2172_begin_0 = const()[name = tensor("op_2172_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2172_end_0 = const()[name = tensor("op_2172_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_2172_end_mask_0 = const()[name = tensor("op_2172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2172_cast_fp16 = slice_by_index(begin = var_2172_begin_0, end = var_2172_end_0, end_mask = var_2172_end_mask_0, x = k_cast_fp16)[name = tensor("op_2172_cast_fp16")]; + tensor var_2176_begin_0 = const()[name = tensor("op_2176_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2176_end_0 = const()[name = tensor("op_2176_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2176_end_mask_0 = const()[name = tensor("op_2176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = var_2176_end_0, end_mask = var_2176_end_mask_0, x = k_cast_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2180_begin_0 = const()[name = tensor("op_2180_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2180_end_0 = const()[name = tensor("op_2180_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_2180_end_mask_0 = const()[name = tensor("op_2180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2180_cast_fp16 = slice_by_index(begin = var_2180_begin_0, end = var_2180_end_0, end_mask = var_2180_end_mask_0, x = k_cast_fp16)[name = tensor("op_2180_cast_fp16")]; + tensor var_2184_begin_0 = const()[name = tensor("op_2184_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2184_end_0 = const()[name = tensor("op_2184_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_2184_end_mask_0 = const()[name = tensor("op_2184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2184_cast_fp16 = slice_by_index(begin = var_2184_begin_0, end = var_2184_end_0, end_mask = var_2184_end_mask_0, x = k_cast_fp16)[name = tensor("op_2184_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = k_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = k_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor var_2196_begin_0 = const()[name = tensor("op_2196_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2196_end_0 = const()[name = tensor("op_2196_end_0"), val = tensor([1, 512, 1, 1152])]; + tensor var_2196_end_mask_0 = const()[name = tensor("op_2196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2196_cast_fp16 = slice_by_index(begin = var_2196_begin_0, end = var_2196_end_0, end_mask = var_2196_end_mask_0, x = k_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor var_2200_begin_0 = const()[name = tensor("op_2200_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2200_end_0 = const()[name = tensor("op_2200_end_0"), val = tensor([1, 512, 1, 1280])]; + tensor var_2200_end_mask_0 = const()[name = tensor("op_2200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2200_cast_fp16 = slice_by_index(begin = var_2200_begin_0, end = var_2200_end_0, end_mask = var_2200_end_mask_0, x = k_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor var_2204_begin_0 = const()[name = tensor("op_2204_begin_0"), val = tensor([0, 0, 0, 1280])]; + tensor var_2204_end_0 = const()[name = tensor("op_2204_end_0"), val = tensor([1, 512, 1, 1408])]; + tensor var_2204_end_mask_0 = const()[name = tensor("op_2204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2204_cast_fp16 = slice_by_index(begin = var_2204_begin_0, end = var_2204_end_0, end_mask = var_2204_end_mask_0, x = k_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor var_2208_begin_0 = const()[name = tensor("op_2208_begin_0"), val = tensor([0, 0, 0, 1408])]; + tensor var_2208_end_0 = const()[name = tensor("op_2208_end_0"), val = tensor([1, 512, 1, 1536])]; + tensor var_2208_end_mask_0 = const()[name = tensor("op_2208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2208_cast_fp16 = slice_by_index(begin = var_2208_begin_0, end = var_2208_end_0, end_mask = var_2208_end_mask_0, x = k_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor var_2212_begin_0 = const()[name = tensor("op_2212_begin_0"), val = tensor([0, 0, 0, 1536])]; + tensor var_2212_end_0 = const()[name = tensor("op_2212_end_0"), val = tensor([1, 512, 1, 1664])]; + tensor var_2212_end_mask_0 = const()[name = tensor("op_2212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2212_cast_fp16 = slice_by_index(begin = var_2212_begin_0, end = var_2212_end_0, end_mask = var_2212_end_mask_0, x = k_cast_fp16)[name = tensor("op_2212_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 1664])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 512, 1, 1792])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = k_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 1792])]; + tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 512, 1, 1920])]; + tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = k_cast_fp16)[name = tensor("op_2220_cast_fp16")]; + tensor var_2224_begin_0 = const()[name = tensor("op_2224_begin_0"), val = tensor([0, 0, 0, 1920])]; + tensor var_2224_end_0 = const()[name = tensor("op_2224_end_0"), val = tensor([1, 512, 1, 2048])]; + tensor var_2224_end_mask_0 = const()[name = tensor("op_2224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2224_cast_fp16 = slice_by_index(begin = var_2224_begin_0, end = var_2224_end_0, end_mask = var_2224_end_mask_0, x = k_cast_fp16)[name = tensor("op_2224_cast_fp16")]; + tensor var_2228_begin_0 = const()[name = tensor("op_2228_begin_0"), val = tensor([0, 0, 0, 2048])]; + tensor var_2228_end_0 = const()[name = tensor("op_2228_end_0"), val = tensor([1, 512, 1, 2176])]; + tensor var_2228_end_mask_0 = const()[name = tensor("op_2228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2228_cast_fp16 = slice_by_index(begin = var_2228_begin_0, end = var_2228_end_0, end_mask = var_2228_end_mask_0, x = k_cast_fp16)[name = tensor("op_2228_cast_fp16")]; + tensor var_2232_begin_0 = const()[name = tensor("op_2232_begin_0"), val = tensor([0, 0, 0, 2176])]; + tensor var_2232_end_0 = const()[name = tensor("op_2232_end_0"), val = tensor([1, 512, 1, 2304])]; + tensor var_2232_end_mask_0 = const()[name = tensor("op_2232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2232_cast_fp16 = slice_by_index(begin = var_2232_begin_0, end = var_2232_end_0, end_mask = var_2232_end_mask_0, x = k_cast_fp16)[name = tensor("op_2232_cast_fp16")]; + tensor var_2236_begin_0 = const()[name = tensor("op_2236_begin_0"), val = tensor([0, 0, 0, 2304])]; + tensor var_2236_end_0 = const()[name = tensor("op_2236_end_0"), val = tensor([1, 512, 1, 2432])]; + tensor var_2236_end_mask_0 = const()[name = tensor("op_2236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2236_cast_fp16 = slice_by_index(begin = var_2236_begin_0, end = var_2236_end_0, end_mask = var_2236_end_mask_0, x = k_cast_fp16)[name = tensor("op_2236_cast_fp16")]; + tensor var_2240_begin_0 = const()[name = tensor("op_2240_begin_0"), val = tensor([0, 0, 0, 2432])]; + tensor var_2240_end_0 = const()[name = tensor("op_2240_end_0"), val = tensor([1, 512, 1, 2560])]; + tensor var_2240_end_mask_0 = const()[name = tensor("op_2240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2240_cast_fp16 = slice_by_index(begin = var_2240_begin_0, end = var_2240_end_0, end_mask = var_2240_end_mask_0, x = k_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 2560])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 512, 1, 2688])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = k_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 2688])]; + tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 512, 1, 2816])]; + tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = k_cast_fp16)[name = tensor("op_2248_cast_fp16")]; + tensor var_2252_begin_0 = const()[name = tensor("op_2252_begin_0"), val = tensor([0, 0, 0, 2816])]; + tensor var_2252_end_0 = const()[name = tensor("op_2252_end_0"), val = tensor([1, 512, 1, 2944])]; + tensor var_2252_end_mask_0 = const()[name = tensor("op_2252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2252_cast_fp16 = slice_by_index(begin = var_2252_begin_0, end = var_2252_end_0, end_mask = var_2252_end_mask_0, x = k_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor var_2256_begin_0 = const()[name = tensor("op_2256_begin_0"), val = tensor([0, 0, 0, 2944])]; + tensor var_2256_end_0 = const()[name = tensor("op_2256_end_0"), val = tensor([1, 512, 1, 3072])]; + tensor var_2256_end_mask_0 = const()[name = tensor("op_2256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2256_cast_fp16 = slice_by_index(begin = var_2256_begin_0, end = var_2256_end_0, end_mask = var_2256_end_mask_0, x = k_cast_fp16)[name = tensor("op_2256_cast_fp16")]; + tensor var_2260_begin_0 = const()[name = tensor("op_2260_begin_0"), val = tensor([0, 0, 0, 3072])]; + tensor var_2260_end_0 = const()[name = tensor("op_2260_end_0"), val = tensor([1, 512, 1, 3200])]; + tensor var_2260_end_mask_0 = const()[name = tensor("op_2260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2260_cast_fp16 = slice_by_index(begin = var_2260_begin_0, end = var_2260_end_0, end_mask = var_2260_end_mask_0, x = k_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = tensor("op_2264_begin_0"), val = tensor([0, 0, 0, 3200])]; + tensor var_2264_end_0 = const()[name = tensor("op_2264_end_0"), val = tensor([1, 512, 1, 3328])]; + tensor var_2264_end_mask_0 = const()[name = tensor("op_2264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, x = k_cast_fp16)[name = tensor("op_2264_cast_fp16")]; + tensor var_2268_begin_0 = const()[name = tensor("op_2268_begin_0"), val = tensor([0, 0, 0, 3328])]; + tensor var_2268_end_0 = const()[name = tensor("op_2268_end_0"), val = tensor([1, 512, 1, 3456])]; + tensor var_2268_end_mask_0 = const()[name = tensor("op_2268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2268_cast_fp16 = slice_by_index(begin = var_2268_begin_0, end = var_2268_end_0, end_mask = var_2268_end_mask_0, x = k_cast_fp16)[name = tensor("op_2268_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 3456])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 512, 1, 3584])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = k_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 3584])]; + tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 512, 1, 3712])]; + tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = k_cast_fp16)[name = tensor("op_2276_cast_fp16")]; + tensor var_2280_begin_0 = const()[name = tensor("op_2280_begin_0"), val = tensor([0, 0, 0, 3712])]; + tensor var_2280_end_0 = const()[name = tensor("op_2280_end_0"), val = tensor([1, 512, 1, 3840])]; + tensor var_2280_end_mask_0 = const()[name = tensor("op_2280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2280_cast_fp16 = slice_by_index(begin = var_2280_begin_0, end = var_2280_end_0, end_mask = var_2280_end_mask_0, x = k_cast_fp16)[name = tensor("op_2280_cast_fp16")]; + tensor var_2284_begin_0 = const()[name = tensor("op_2284_begin_0"), val = tensor([0, 0, 0, 3840])]; + tensor var_2284_end_0 = const()[name = tensor("op_2284_end_0"), val = tensor([1, 512, 1, 3968])]; + tensor var_2284_end_mask_0 = const()[name = tensor("op_2284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2284_cast_fp16 = slice_by_index(begin = var_2284_begin_0, end = var_2284_end_0, end_mask = var_2284_end_mask_0, x = k_cast_fp16)[name = tensor("op_2284_cast_fp16")]; + tensor var_2288_begin_0 = const()[name = tensor("op_2288_begin_0"), val = tensor([0, 0, 0, 3968])]; + tensor var_2288_end_0 = const()[name = tensor("op_2288_end_0"), val = tensor([1, 512, 1, 4096])]; + tensor var_2288_end_mask_0 = const()[name = tensor("op_2288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2288_cast_fp16 = slice_by_index(begin = var_2288_begin_0, end = var_2288_end_0, end_mask = var_2288_end_mask_0, x = k_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2290_cast_fp16")]; + tensor var_2294_begin_0 = const()[name = tensor("op_2294_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2294_end_0 = const()[name = tensor("op_2294_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_2294_end_mask_0 = const()[name = tensor("op_2294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2294_cast_fp16 = slice_by_index(begin = var_2294_begin_0, end = var_2294_end_0, end_mask = var_2294_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2298_begin_0 = const()[name = tensor("op_2298_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2298_end_0 = const()[name = tensor("op_2298_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2298_end_mask_0 = const()[name = tensor("op_2298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = var_2298_end_0, end_mask = var_2298_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2298_cast_fp16")]; + tensor var_2302_begin_0 = const()[name = tensor("op_2302_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2302_end_0 = const()[name = tensor("op_2302_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_2302_end_mask_0 = const()[name = tensor("op_2302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2302_cast_fp16 = slice_by_index(begin = var_2302_begin_0, end = var_2302_end_0, end_mask = var_2302_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2302_cast_fp16")]; + tensor var_2306_begin_0 = const()[name = tensor("op_2306_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2306_end_0 = const()[name = tensor("op_2306_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_2306_end_mask_0 = const()[name = tensor("op_2306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2306_cast_fp16 = slice_by_index(begin = var_2306_begin_0, end = var_2306_end_0, end_mask = var_2306_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2306_cast_fp16")]; + tensor var_2310_begin_0 = const()[name = tensor("op_2310_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2310_end_0 = const()[name = tensor("op_2310_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_2310_end_mask_0 = const()[name = tensor("op_2310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2310_cast_fp16 = slice_by_index(begin = var_2310_begin_0, end = var_2310_end_0, end_mask = var_2310_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2310_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2318_cast_fp16")]; + tensor var_2322_begin_0 = const()[name = tensor("op_2322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2322_end_0 = const()[name = tensor("op_2322_end_0"), val = tensor([1, 1152, 1, 512])]; + tensor var_2322_end_mask_0 = const()[name = tensor("op_2322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2322_cast_fp16 = slice_by_index(begin = var_2322_begin_0, end = var_2322_end_0, end_mask = var_2322_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor var_2326_begin_0 = const()[name = tensor("op_2326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2326_end_0 = const()[name = tensor("op_2326_end_0"), val = tensor([1, 1280, 1, 512])]; + tensor var_2326_end_mask_0 = const()[name = tensor("op_2326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2326_cast_fp16 = slice_by_index(begin = var_2326_begin_0, end = var_2326_end_0, end_mask = var_2326_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2326_cast_fp16")]; + tensor var_2330_begin_0 = const()[name = tensor("op_2330_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_2330_end_0 = const()[name = tensor("op_2330_end_0"), val = tensor([1, 1408, 1, 512])]; + tensor var_2330_end_mask_0 = const()[name = tensor("op_2330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2330_cast_fp16 = slice_by_index(begin = var_2330_begin_0, end = var_2330_end_0, end_mask = var_2330_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2330_cast_fp16")]; + tensor var_2334_begin_0 = const()[name = tensor("op_2334_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_2334_end_0 = const()[name = tensor("op_2334_end_0"), val = tensor([1, 1536, 1, 512])]; + tensor var_2334_end_mask_0 = const()[name = tensor("op_2334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2334_cast_fp16 = slice_by_index(begin = var_2334_begin_0, end = var_2334_end_0, end_mask = var_2334_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2334_cast_fp16")]; + tensor var_2338_begin_0 = const()[name = tensor("op_2338_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_2338_end_0 = const()[name = tensor("op_2338_end_0"), val = tensor([1, 1664, 1, 512])]; + tensor var_2338_end_mask_0 = const()[name = tensor("op_2338_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2338_cast_fp16 = slice_by_index(begin = var_2338_begin_0, end = var_2338_end_0, end_mask = var_2338_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2338_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 1792, 1, 512])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 1920, 1, 512])]; + tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2346_cast_fp16")]; + tensor var_2350_begin_0 = const()[name = tensor("op_2350_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_2350_end_0 = const()[name = tensor("op_2350_end_0"), val = tensor([1, 2048, 1, 512])]; + tensor var_2350_end_mask_0 = const()[name = tensor("op_2350_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2350_cast_fp16 = slice_by_index(begin = var_2350_begin_0, end = var_2350_end_0, end_mask = var_2350_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2354_begin_0 = const()[name = tensor("op_2354_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_2354_end_0 = const()[name = tensor("op_2354_end_0"), val = tensor([1, 2176, 1, 512])]; + tensor var_2354_end_mask_0 = const()[name = tensor("op_2354_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2354_cast_fp16 = slice_by_index(begin = var_2354_begin_0, end = var_2354_end_0, end_mask = var_2354_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2358_begin_0 = const()[name = tensor("op_2358_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_2358_end_0 = const()[name = tensor("op_2358_end_0"), val = tensor([1, 2304, 1, 512])]; + tensor var_2358_end_mask_0 = const()[name = tensor("op_2358_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2358_cast_fp16 = slice_by_index(begin = var_2358_begin_0, end = var_2358_end_0, end_mask = var_2358_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2358_cast_fp16")]; + tensor var_2362_begin_0 = const()[name = tensor("op_2362_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_2362_end_0 = const()[name = tensor("op_2362_end_0"), val = tensor([1, 2432, 1, 512])]; + tensor var_2362_end_mask_0 = const()[name = tensor("op_2362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2362_cast_fp16 = slice_by_index(begin = var_2362_begin_0, end = var_2362_end_0, end_mask = var_2362_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2362_cast_fp16")]; + tensor var_2366_begin_0 = const()[name = tensor("op_2366_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_2366_end_0 = const()[name = tensor("op_2366_end_0"), val = tensor([1, 2560, 1, 512])]; + tensor var_2366_end_mask_0 = const()[name = tensor("op_2366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2366_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 2688, 1, 512])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 2816, 1, 512])]; + tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2374_cast_fp16")]; + tensor var_2378_begin_0 = const()[name = tensor("op_2378_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_2378_end_0 = const()[name = tensor("op_2378_end_0"), val = tensor([1, 2944, 1, 512])]; + tensor var_2378_end_mask_0 = const()[name = tensor("op_2378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2378_cast_fp16")]; + tensor var_2382_begin_0 = const()[name = tensor("op_2382_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_2382_end_0 = const()[name = tensor("op_2382_end_0"), val = tensor([1, 3072, 1, 512])]; + tensor var_2382_end_mask_0 = const()[name = tensor("op_2382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2382_cast_fp16 = slice_by_index(begin = var_2382_begin_0, end = var_2382_end_0, end_mask = var_2382_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2382_cast_fp16")]; + tensor var_2386_begin_0 = const()[name = tensor("op_2386_begin_0"), val = tensor([0, 3072, 0, 0])]; + tensor var_2386_end_0 = const()[name = tensor("op_2386_end_0"), val = tensor([1, 3200, 1, 512])]; + tensor var_2386_end_mask_0 = const()[name = tensor("op_2386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = var_2386_end_0, end_mask = var_2386_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 3200, 0, 0])]; + tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([1, 3328, 1, 512])]; + tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2390_cast_fp16")]; + tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 3328, 0, 0])]; + tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([1, 3456, 1, 512])]; + tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2394_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 3456, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 3584, 1, 512])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 3584, 0, 0])]; + tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 3712, 1, 512])]; + tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2402_cast_fp16")]; + tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 3712, 0, 0])]; + tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([1, 3840, 1, 512])]; + tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2406_cast_fp16")]; + tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 3840, 0, 0])]; + tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([1, 3968, 1, 512])]; + tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2410_cast_fp16")]; + tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 3968, 0, 0])]; + tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([1, 4096, 1, 512])]; + tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_2414_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2164_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2418_cast_fp16")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2420_cast_fp16 = mul(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = tensor("op_2420_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2168_cast_fp16, var_2038_cast_fp16))[name = tensor("op_2422_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2424_cast_fp16 = mul(x = var_2422_cast_fp16, y = var_2423_to_fp16)[name = tensor("op_2424_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2172_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2426_cast_fp16")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2428_cast_fp16 = mul(x = var_2426_cast_fp16, y = var_2427_to_fp16)[name = tensor("op_2428_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2176_cast_fp16, var_2046_cast_fp16))[name = tensor("op_2430_cast_fp16")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2432_cast_fp16 = mul(x = var_2430_cast_fp16, y = var_2431_to_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2180_cast_fp16, var_2050_cast_fp16))[name = tensor("op_2434_cast_fp16")]; + tensor var_2435_to_fp16 = const()[name = tensor("op_2435_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2436_cast_fp16 = mul(x = var_2434_cast_fp16, y = var_2435_to_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2184_cast_fp16, var_2054_cast_fp16))[name = tensor("op_2438_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = tensor("op_2439_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2440_cast_fp16 = mul(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2188_cast_fp16, var_2058_cast_fp16))[name = tensor("op_2442_cast_fp16")]; + tensor var_2443_to_fp16 = const()[name = tensor("op_2443_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2444_cast_fp16 = mul(x = var_2442_cast_fp16, y = var_2443_to_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2192_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2446_cast_fp16")]; + tensor var_2447_to_fp16 = const()[name = tensor("op_2447_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2448_cast_fp16 = mul(x = var_2446_cast_fp16, y = var_2447_to_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2196_cast_fp16, var_2066_cast_fp16))[name = tensor("op_2450_cast_fp16")]; + tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2452_cast_fp16 = mul(x = var_2450_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2200_cast_fp16, var_2070_cast_fp16))[name = tensor("op_2454_cast_fp16")]; + tensor var_2455_to_fp16 = const()[name = tensor("op_2455_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2456_cast_fp16 = mul(x = var_2454_cast_fp16, y = var_2455_to_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2204_cast_fp16, var_2074_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2459_to_fp16 = const()[name = tensor("op_2459_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2460_cast_fp16 = mul(x = var_2458_cast_fp16, y = var_2459_to_fp16)[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2208_cast_fp16, var_2078_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2464_cast_fp16 = mul(x = var_2462_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2212_cast_fp16, var_2082_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = tensor("op_2467_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2468_cast_fp16 = mul(x = var_2466_cast_fp16, y = var_2467_to_fp16)[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2216_cast_fp16, var_2086_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = tensor("op_2471_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2472_cast_fp16 = mul(x = var_2470_cast_fp16, y = var_2471_to_fp16)[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2220_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = tensor("op_2475_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2476_cast_fp16 = mul(x = var_2474_cast_fp16, y = var_2475_to_fp16)[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2224_cast_fp16, var_2094_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2479_to_fp16 = const()[name = tensor("op_2479_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2480_cast_fp16 = mul(x = var_2478_cast_fp16, y = var_2479_to_fp16)[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2228_cast_fp16, var_2098_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = tensor("op_2483_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2484_cast_fp16 = mul(x = var_2482_cast_fp16, y = var_2483_to_fp16)[name = tensor("op_2484_cast_fp16")]; + tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2232_cast_fp16, var_2102_cast_fp16))[name = tensor("op_2486_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = tensor("op_2487_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2488_cast_fp16 = mul(x = var_2486_cast_fp16, y = var_2487_to_fp16)[name = tensor("op_2488_cast_fp16")]; + tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2236_cast_fp16, var_2106_cast_fp16))[name = tensor("op_2490_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2492_cast_fp16 = mul(x = var_2490_cast_fp16, y = var_2491_to_fp16)[name = tensor("op_2492_cast_fp16")]; + tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2240_cast_fp16, var_2110_cast_fp16))[name = tensor("op_2494_cast_fp16")]; + tensor var_2495_to_fp16 = const()[name = tensor("op_2495_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2496_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2495_to_fp16)[name = tensor("op_2496_cast_fp16")]; + tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2244_cast_fp16, var_2114_cast_fp16))[name = tensor("op_2498_cast_fp16")]; + tensor var_2499_to_fp16 = const()[name = tensor("op_2499_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2500_cast_fp16 = mul(x = var_2498_cast_fp16, y = var_2499_to_fp16)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2248_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2502_cast_fp16")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2504_cast_fp16 = mul(x = var_2502_cast_fp16, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; + tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2252_cast_fp16, var_2122_cast_fp16))[name = tensor("op_2506_cast_fp16")]; + tensor var_2507_to_fp16 = const()[name = tensor("op_2507_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2508_cast_fp16 = mul(x = var_2506_cast_fp16, y = var_2507_to_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2256_cast_fp16, var_2126_cast_fp16))[name = tensor("op_2510_cast_fp16")]; + tensor var_2511_to_fp16 = const()[name = tensor("op_2511_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2512_cast_fp16 = mul(x = var_2510_cast_fp16, y = var_2511_to_fp16)[name = tensor("op_2512_cast_fp16")]; + tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2260_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2514_cast_fp16")]; + tensor var_2515_to_fp16 = const()[name = tensor("op_2515_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2516_cast_fp16 = mul(x = var_2514_cast_fp16, y = var_2515_to_fp16)[name = tensor("op_2516_cast_fp16")]; + tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2264_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2518_cast_fp16")]; + tensor var_2519_to_fp16 = const()[name = tensor("op_2519_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2520_cast_fp16 = mul(x = var_2518_cast_fp16, y = var_2519_to_fp16)[name = tensor("op_2520_cast_fp16")]; + tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2268_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2522_cast_fp16")]; + tensor var_2523_to_fp16 = const()[name = tensor("op_2523_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2524_cast_fp16 = mul(x = var_2522_cast_fp16, y = var_2523_to_fp16)[name = tensor("op_2524_cast_fp16")]; + tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2272_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2526_cast_fp16")]; + tensor var_2527_to_fp16 = const()[name = tensor("op_2527_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2528_cast_fp16 = mul(x = var_2526_cast_fp16, y = var_2527_to_fp16)[name = tensor("op_2528_cast_fp16")]; + tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2276_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2530_cast_fp16")]; + tensor var_2531_to_fp16 = const()[name = tensor("op_2531_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2532_cast_fp16 = mul(x = var_2530_cast_fp16, y = var_2531_to_fp16)[name = tensor("op_2532_cast_fp16")]; + tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2280_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2534_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = tensor("op_2535_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2536_cast_fp16 = mul(x = var_2534_cast_fp16, y = var_2535_to_fp16)[name = tensor("op_2536_cast_fp16")]; + tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2284_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2538_cast_fp16")]; + tensor var_2539_to_fp16 = const()[name = tensor("op_2539_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2540_cast_fp16 = mul(x = var_2538_cast_fp16, y = var_2539_to_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2288_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2542_cast_fp16")]; + tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_2544_cast_fp16 = mul(x = var_2542_cast_fp16, y = var_2543_to_fp16)[name = tensor("op_2544_cast_fp16")]; + tensor aw_129_cast_fp16 = add(x = var_2420_cast_fp16, y = mask)[name = tensor("aw_129_cast_fp16")]; + tensor aw_131_cast_fp16 = add(x = var_2424_cast_fp16, y = mask)[name = tensor("aw_131_cast_fp16")]; + tensor aw_133_cast_fp16 = add(x = var_2428_cast_fp16, y = mask)[name = tensor("aw_133_cast_fp16")]; + tensor aw_135_cast_fp16 = add(x = var_2432_cast_fp16, y = mask)[name = tensor("aw_135_cast_fp16")]; + tensor aw_137_cast_fp16 = add(x = var_2436_cast_fp16, y = mask)[name = tensor("aw_137_cast_fp16")]; + tensor aw_139_cast_fp16 = add(x = var_2440_cast_fp16, y = mask)[name = tensor("aw_139_cast_fp16")]; + tensor aw_141_cast_fp16 = add(x = var_2444_cast_fp16, y = mask)[name = tensor("aw_141_cast_fp16")]; + tensor aw_143_cast_fp16 = add(x = var_2448_cast_fp16, y = mask)[name = tensor("aw_143_cast_fp16")]; + tensor aw_145_cast_fp16 = add(x = var_2452_cast_fp16, y = mask)[name = tensor("aw_145_cast_fp16")]; + tensor aw_147_cast_fp16 = add(x = var_2456_cast_fp16, y = mask)[name = tensor("aw_147_cast_fp16")]; + tensor aw_149_cast_fp16 = add(x = var_2460_cast_fp16, y = mask)[name = tensor("aw_149_cast_fp16")]; + tensor aw_151_cast_fp16 = add(x = var_2464_cast_fp16, y = mask)[name = tensor("aw_151_cast_fp16")]; + tensor aw_153_cast_fp16 = add(x = var_2468_cast_fp16, y = mask)[name = tensor("aw_153_cast_fp16")]; + tensor aw_155_cast_fp16 = add(x = var_2472_cast_fp16, y = mask)[name = tensor("aw_155_cast_fp16")]; + tensor aw_157_cast_fp16 = add(x = var_2476_cast_fp16, y = mask)[name = tensor("aw_157_cast_fp16")]; + tensor aw_159_cast_fp16 = add(x = var_2480_cast_fp16, y = mask)[name = tensor("aw_159_cast_fp16")]; + tensor aw_161_cast_fp16 = add(x = var_2484_cast_fp16, y = mask)[name = tensor("aw_161_cast_fp16")]; + tensor aw_163_cast_fp16 = add(x = var_2488_cast_fp16, y = mask)[name = tensor("aw_163_cast_fp16")]; + tensor aw_165_cast_fp16 = add(x = var_2492_cast_fp16, y = mask)[name = tensor("aw_165_cast_fp16")]; + tensor aw_167_cast_fp16 = add(x = var_2496_cast_fp16, y = mask)[name = tensor("aw_167_cast_fp16")]; + tensor aw_169_cast_fp16 = add(x = var_2500_cast_fp16, y = mask)[name = tensor("aw_169_cast_fp16")]; + tensor aw_171_cast_fp16 = add(x = var_2504_cast_fp16, y = mask)[name = tensor("aw_171_cast_fp16")]; + tensor aw_173_cast_fp16 = add(x = var_2508_cast_fp16, y = mask)[name = tensor("aw_173_cast_fp16")]; + tensor aw_175_cast_fp16 = add(x = var_2512_cast_fp16, y = mask)[name = tensor("aw_175_cast_fp16")]; + tensor aw_177_cast_fp16 = add(x = var_2516_cast_fp16, y = mask)[name = tensor("aw_177_cast_fp16")]; + tensor aw_179_cast_fp16 = add(x = var_2520_cast_fp16, y = mask)[name = tensor("aw_179_cast_fp16")]; + tensor aw_181_cast_fp16 = add(x = var_2524_cast_fp16, y = mask)[name = tensor("aw_181_cast_fp16")]; + tensor aw_183_cast_fp16 = add(x = var_2528_cast_fp16, y = mask)[name = tensor("aw_183_cast_fp16")]; + tensor aw_185_cast_fp16 = add(x = var_2532_cast_fp16, y = mask)[name = tensor("aw_185_cast_fp16")]; + tensor aw_187_cast_fp16 = add(x = var_2536_cast_fp16, y = mask)[name = tensor("aw_187_cast_fp16")]; + tensor aw_189_cast_fp16 = add(x = var_2540_cast_fp16, y = mask)[name = tensor("aw_189_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_2544_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_1886, x = aw_129_cast_fp16)[name = tensor("op_2577_cast_fp16")]; + tensor var_2578_cast_fp16 = softmax(axis = var_1886, x = aw_131_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2579_cast_fp16 = softmax(axis = var_1886, x = aw_133_cast_fp16)[name = tensor("op_2579_cast_fp16")]; + tensor var_2580_cast_fp16 = softmax(axis = var_1886, x = aw_135_cast_fp16)[name = tensor("op_2580_cast_fp16")]; + tensor var_2581_cast_fp16 = softmax(axis = var_1886, x = aw_137_cast_fp16)[name = tensor("op_2581_cast_fp16")]; + tensor var_2582_cast_fp16 = softmax(axis = var_1886, x = aw_139_cast_fp16)[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_cast_fp16 = softmax(axis = var_1886, x = aw_141_cast_fp16)[name = tensor("op_2583_cast_fp16")]; + tensor var_2584_cast_fp16 = softmax(axis = var_1886, x = aw_143_cast_fp16)[name = tensor("op_2584_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_1886, x = aw_145_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586_cast_fp16 = softmax(axis = var_1886, x = aw_147_cast_fp16)[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_cast_fp16 = softmax(axis = var_1886, x = aw_149_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor var_2588_cast_fp16 = softmax(axis = var_1886, x = aw_151_cast_fp16)[name = tensor("op_2588_cast_fp16")]; + tensor var_2589_cast_fp16 = softmax(axis = var_1886, x = aw_153_cast_fp16)[name = tensor("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = softmax(axis = var_1886, x = aw_155_cast_fp16)[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_cast_fp16 = softmax(axis = var_1886, x = aw_157_cast_fp16)[name = tensor("op_2591_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_1886, x = aw_159_cast_fp16)[name = tensor("op_2592_cast_fp16")]; + tensor var_2593_cast_fp16 = softmax(axis = var_1886, x = aw_161_cast_fp16)[name = tensor("op_2593_cast_fp16")]; + tensor var_2594_cast_fp16 = softmax(axis = var_1886, x = aw_163_cast_fp16)[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_cast_fp16 = softmax(axis = var_1886, x = aw_165_cast_fp16)[name = tensor("op_2595_cast_fp16")]; + tensor var_2596_cast_fp16 = softmax(axis = var_1886, x = aw_167_cast_fp16)[name = tensor("op_2596_cast_fp16")]; + tensor var_2597_cast_fp16 = softmax(axis = var_1886, x = aw_169_cast_fp16)[name = tensor("op_2597_cast_fp16")]; + tensor var_2598_cast_fp16 = softmax(axis = var_1886, x = aw_171_cast_fp16)[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_cast_fp16 = softmax(axis = var_1886, x = aw_173_cast_fp16)[name = tensor("op_2599_cast_fp16")]; + tensor var_2600_cast_fp16 = softmax(axis = var_1886, x = aw_175_cast_fp16)[name = tensor("op_2600_cast_fp16")]; + tensor var_2601_cast_fp16 = softmax(axis = var_1886, x = aw_177_cast_fp16)[name = tensor("op_2601_cast_fp16")]; + tensor var_2602_cast_fp16 = softmax(axis = var_1886, x = aw_179_cast_fp16)[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_cast_fp16 = softmax(axis = var_1886, x = aw_181_cast_fp16)[name = tensor("op_2603_cast_fp16")]; + tensor var_2604_cast_fp16 = softmax(axis = var_1886, x = aw_183_cast_fp16)[name = tensor("op_2604_cast_fp16")]; + tensor var_2605_cast_fp16 = softmax(axis = var_1886, x = aw_185_cast_fp16)[name = tensor("op_2605_cast_fp16")]; + tensor var_2606_cast_fp16 = softmax(axis = var_1886, x = aw_187_cast_fp16)[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_cast_fp16 = softmax(axis = var_1886, x = aw_189_cast_fp16)[name = tensor("op_2607_cast_fp16")]; + tensor var_2608_cast_fp16 = softmax(axis = var_1886, x = aw_cast_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2290_cast_fp16, var_2577_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2294_cast_fp16, var_2578_cast_fp16))[name = tensor("op_2612_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2298_cast_fp16, var_2579_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2302_cast_fp16, var_2580_cast_fp16))[name = tensor("op_2616_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2306_cast_fp16, var_2581_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2310_cast_fp16, var_2582_cast_fp16))[name = tensor("op_2620_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2314_cast_fp16, var_2583_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2318_cast_fp16, var_2584_cast_fp16))[name = tensor("op_2624_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2322_cast_fp16, var_2585_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2326_cast_fp16, var_2586_cast_fp16))[name = tensor("op_2628_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2330_cast_fp16, var_2587_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2334_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2632_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2338_cast_fp16, var_2589_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2342_cast_fp16, var_2590_cast_fp16))[name = tensor("op_2636_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2346_cast_fp16, var_2591_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2350_cast_fp16, var_2592_cast_fp16))[name = tensor("op_2640_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2354_cast_fp16, var_2593_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2358_cast_fp16, var_2594_cast_fp16))[name = tensor("op_2644_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2362_cast_fp16, var_2595_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2366_cast_fp16, var_2596_cast_fp16))[name = tensor("op_2648_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2370_cast_fp16, var_2597_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2652_cast_fp16 = einsum(equation = var_2652_equation_0, values = (var_2374_cast_fp16, var_2598_cast_fp16))[name = tensor("op_2652_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2378_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2656_cast_fp16 = einsum(equation = var_2656_equation_0, values = (var_2382_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2656_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2386_cast_fp16, var_2601_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2660_cast_fp16 = einsum(equation = var_2660_equation_0, values = (var_2390_cast_fp16, var_2602_cast_fp16))[name = tensor("op_2660_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2394_cast_fp16, var_2603_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2664_cast_fp16 = einsum(equation = var_2664_equation_0, values = (var_2398_cast_fp16, var_2604_cast_fp16))[name = tensor("op_2664_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2402_cast_fp16, var_2605_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2668_cast_fp16 = einsum(equation = var_2668_equation_0, values = (var_2406_cast_fp16, var_2606_cast_fp16))[name = tensor("op_2668_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2410_cast_fp16, var_2607_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2672_cast_fp16 = einsum(equation = var_2672_equation_0, values = (var_2414_cast_fp16, var_2608_cast_fp16))[name = tensor("op_2672_cast_fp16")]; + tensor x_43_interleave_0 = const()[name = tensor("x_43_interleave_0"), val = tensor(false)]; + tensor x_43_cast_fp16 = concat(axis = var_1886, interleave = x_43_interleave_0, values = (var_2610_cast_fp16, var_2612_cast_fp16, var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16, var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16, var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16, var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("x_43_cast_fp16")]; + tensor var_2677 = const()[name = tensor("op_2677"), val = tensor([1, 4096, -1, 8])]; + tensor input_23_cast_fp16 = reshape(shape = var_2677, x = x_43_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683 = const()[name = tensor("op_2683"), val = tensor([1, 1])]; + tensor var_2685_pad_type_0 = const()[name = tensor("op_2685_pad_type_0"), val = tensor("custom")]; + tensor var_2685_pad_0 = const()[name = tensor("op_2685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2685_cast_fp16 = conv(dilations = var_2683, groups = var_1886, pad = var_2685_pad_0, pad_type = var_2685_pad_type_0, strides = var_2681, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_23_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303804736)))]; + tensor attention_output_cast_fp16 = mul(x = var_2685_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = attention_output_cast_fp16, y = x_33_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812992)))]; + tensor x_eps_cast_fp16 = concat(axis = var_1886, interleave = x_eps_interleave_0, values = (x_45_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_1889, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_31_cast_fp16 = real_div(x = x_45_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_31_cast_fp16")]; + tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p+6)]; + tensor x_normed_33_cast_fp16 = mul(x = x_normed_31_cast_fp16, y = var_2710_to_fp16)[name = tensor("x_normed_33_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303813184)))]; + tensor input_25_cast_fp16 = mul(x = x_normed_33_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_2722 = const()[name = tensor("op_2722"), val = tensor([1, 1])]; + tensor var_2724 = const()[name = tensor("op_2724"), val = tensor([1, 1])]; + tensor var_2726_pad_type_0 = const()[name = tensor("op_2726_pad_type_0"), val = tensor("custom")]; + tensor var_2726_pad_0 = const()[name = tensor("op_2726_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2726_cast_fp16 = conv(dilations = var_2724, groups = var_1886, pad = var_2726_pad_0, pad_type = var_2726_pad_type_0, strides = var_2722, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2726_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303821440)))]; + tensor input_27_cast_fp16 = mul(x = var_2726_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 1])]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734_pad_type_0 = const()[name = tensor("op_2734_pad_type_0"), val = tensor("custom")]; + tensor var_2734_pad_0 = const()[name = tensor("op_2734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2734_cast_fp16 = conv(dilations = var_2732, groups = var_1886, pad = var_2734_pad_0, pad_type = var_2734_pad_type_0, strides = var_2730, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_25_cast_fp16)[name = tensor("op_2734_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303843520)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_2734_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_2736_cast_fp16 = silu(x = input_27_cast_fp16)[name = tensor("op_2736_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_2736_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor var_2742 = const()[name = tensor("op_2742"), val = tensor([1, 1])]; + tensor var_2744_pad_type_0 = const()[name = tensor("op_2744_pad_type_0"), val = tensor("custom")]; + tensor var_2744_pad_0 = const()[name = tensor("op_2744_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2744_cast_fp16 = conv(dilations = var_2742, groups = var_1886, pad = var_2744_pad_0, pad_type = var_2744_pad_type_0, strides = var_2740, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_2744_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303865600)))]; + tensor var_2745_cast_fp16 = mul(x = var_2744_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_2745_cast_fp16")]; + tensor new_x = add(x = var_2745_cast_fp16, y = x_45_cast_fp16)[name = tensor("op_2746_cast_fp16")]; } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); } \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin index 271764a94ff3da74677061db9896a4da55baeeca..b9cfd677e3c71d2a6481754f0f7950c5b51cc866 100644 --- a/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin +++ b/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a1aa227b7ef525860924ae4fb4768a8514bfc00a4226c543aba73f22f68c656 -size 303872704 +oid sha256:970ec542d6b1148bfcd434d652a9e2a9d37168e07b86db0011e0a18279e2c419 +size 303873856 diff --git a/generation-cache-processor.mlmodelc/analytics/coremldata.bin b/generation-cache-processor.mlmodelc/analytics/coremldata.bin index d37d971fd6791ba6ac3c0001f54eb9a8cf6af39a..ddccd9c282085e6cca125b52bc218b6f0b642e55 100644 --- a/generation-cache-processor.mlmodelc/analytics/coremldata.bin +++ b/generation-cache-processor.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c961b8e84308365368c6798dfa3395fd75fc5caf4efe68d2362b4cc93bb7602 +oid sha256:134f98ea8a952f820760190a5d9e21c077dfcccd56b6560c759162a2b066cb69 size 243 diff --git a/generation-cache-processor.mlmodelc/coremldata.bin b/generation-cache-processor.mlmodelc/coremldata.bin index 934d5fe36b997a2711ad9e52cec88f23f97a82e7..2c518bcf8d81c335701b272e71aa906fffdeb468 100644 --- a/generation-cache-processor.mlmodelc/coremldata.bin +++ b/generation-cache-processor.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2847ce6c3125df3cdc89c7e7762d1bd4e40ed33c31e8b58c49bffbd795e10ec -size 520 +oid sha256:5a83ba77bf62d7c8dd403062caf0dcfe5b937da123e0d69afb06968cf6301b69 +size 522 diff --git a/generation-cache-processor.mlmodelc/metadata.json b/generation-cache-processor.mlmodelc/metadata.json index e418bcb5abba3fccc3160e04c17a8bf7cbae94da..2f610c7a6711aebe3a72730757f511d8381bed34 100644 --- a/generation-cache-processor.mlmodelc/metadata.json +++ b/generation-cache-processor.mlmodelc/metadata.json @@ -6,9 +6,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "generation_k_cache", "type" : "MultiArray" }, @@ -16,9 +16,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "generation_v_cache", "type" : "MultiArray" }, @@ -58,17 +58,17 @@ }, "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", - "com.github.apple.coremltools.source" : "torch==2.3.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 448, 1, 4096]", "name" : "old_k_cache", "type" : "MultiArray" }, @@ -76,9 +76,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 4096)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 64, 1, 4096]", "name" : "new_k_cache", "type" : "MultiArray" }, @@ -86,9 +86,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 448)", "shortDescription" : "", - "shape" : "[1, 32, 128, 448]", + "shape" : "[1, 4096, 1, 448]", "name" : "old_v_cache", "type" : "MultiArray" }, @@ -96,9 +96,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", "shortDescription" : "", - "shape" : "[1, 32, 128, 64]", + "shape" : "[1, 4096, 1, 64]", "name" : "new_v_cache", "type" : "MultiArray" } diff --git a/generation-cache-processor.mlmodelc/model.mil b/generation-cache-processor.mlmodelc/model.mil index de506f6cacb07dee288aa8cd31c61cd28f439dde..20a55b9d5d0c357ba431ac8298d68bdead46da60 100644 --- a/generation-cache-processor.mlmodelc/model.mil +++ b/generation-cache-processor.mlmodelc/model.mil @@ -1,23 +1,23 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.3.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor new_k_cache, tensor new_v_cache, tensor old_k_cache, tensor old_v_cache) { - tensor var_6 = const()[name = tensor("op_6"), val = tensor(-1)]; + func main(tensor new_k_cache, tensor new_v_cache, tensor old_k_cache, tensor old_v_cache) { + tensor var_6 = const()[name = tensor("op_6"), val = tensor(-3)]; tensor cat_k_1_interleave_0 = const()[name = tensor("cat_k_1_interleave_0"), val = tensor(false)]; - tensor cat_k_1_cast_fp16 = concat(axis = var_6, interleave = cat_k_1_interleave_0, values = (old_k_cache, new_k_cache))[name = tensor("cat_k_1_cast_fp16")]; + tensor cat_k_1_cast_fp16 = concat(axis = var_6, interleave = cat_k_1_interleave_0, values = (old_k_cache, new_k_cache))[name = tensor("cat_k_1_cast_fp16")]; tensor var_9 = const()[name = tensor("op_9"), val = tensor(-1)]; tensor cat_v_interleave_0 = const()[name = tensor("cat_v_interleave_0"), val = tensor(false)]; - tensor cat_v_cast_fp16 = concat(axis = var_9, interleave = cat_v_interleave_0, values = (old_v_cache, new_v_cache))[name = tensor("cat_v_cast_fp16")]; - tensor cat_k_begin_0 = const()[name = tensor("cat_k_begin_0"), val = tensor([0, 0, 0, 1])]; - tensor cat_k_end_0 = const()[name = tensor("cat_k_end_0"), val = tensor([1, 32, 128, 449])]; - tensor cat_k_end_mask_0 = const()[name = tensor("cat_k_end_mask_0"), val = tensor([true, true, true, false])]; - tensor generation_k_cache = slice_by_index(begin = cat_k_begin_0, end = cat_k_end_0, end_mask = cat_k_end_mask_0, x = cat_k_1_cast_fp16)[name = tensor("cat_k_cast_fp16")]; + tensor cat_v_cast_fp16 = concat(axis = var_9, interleave = cat_v_interleave_0, values = (old_v_cache, new_v_cache))[name = tensor("cat_v_cast_fp16")]; + tensor var_20_begin_0 = const()[name = tensor("op_20_begin_0"), val = tensor([0, 1, 0, 0])]; + tensor var_20_end_0 = const()[name = tensor("op_20_end_0"), val = tensor([1, 449, 1, 4096])]; + tensor var_20_end_mask_0 = const()[name = tensor("op_20_end_mask_0"), val = tensor([true, false, true, true])]; + tensor generation_k_cache = slice_by_index(begin = var_20_begin_0, end = var_20_end_0, end_mask = var_20_end_mask_0, x = cat_k_1_cast_fp16)[name = tensor("op_20_cast_fp16")]; tensor var_50_begin_0 = const()[name = tensor("op_50_begin_0"), val = tensor([0, 0, 0, 1])]; - tensor var_50_end_0 = const()[name = tensor("op_50_end_0"), val = tensor([1, 32, 128, 449])]; + tensor var_50_end_0 = const()[name = tensor("op_50_end_0"), val = tensor([1, 4096, 1, 449])]; tensor var_50_end_mask_0 = const()[name = tensor("op_50_end_mask_0"), val = tensor([true, true, true, false])]; - tensor generation_v_cache = slice_by_index(begin = var_50_begin_0, end = var_50_end_0, end_mask = var_50_end_mask_0, x = cat_v_cast_fp16)[name = tensor("op_50_cast_fp16")]; + tensor generation_v_cache = slice_by_index(begin = var_50_begin_0, end = var_50_end_0, end_mask = var_50_end_mask_0, x = cat_v_cast_fp16)[name = tensor("op_50_cast_fp16")]; tensor var_51_promoted_to_fp16 = const()[name = tensor("op_51_promoted_to_fp16"), val = tensor(0x1p+1)]; - tensor prod_cast_fp16 = mul(x = generation_k_cache, y = var_51_promoted_to_fp16)[name = tensor("prod_cast_fp16")]; + tensor prod_cast_fp16 = mul(x = generation_k_cache, y = var_51_promoted_to_fp16)[name = tensor("prod_cast_fp16")]; tensor var_53_keep_dims_0 = const()[name = tensor("op_53_keep_dims_0"), val = tensor(false)]; tensor ignore_me_im_only_here_so_this_runs_on_the_ane = reduce_min(keep_dims = var_53_keep_dims_0, x = prod_cast_fp16)[name = tensor("op_53_cast_fp16")]; } -> (generation_k_cache, generation_v_cache, ignore_me_im_only_here_so_this_runs_on_the_ane); diff --git a/logit-processor.mlmodelc/analytics/coremldata.bin b/logit-processor.mlmodelc/analytics/coremldata.bin index d407de484c78aebeccfa508c0d0d3d77438f2de8..ac67b25bf8f1252645fd24f3b5a86e2cd35dc28d 100644 --- a/logit-processor.mlmodelc/analytics/coremldata.bin +++ b/logit-processor.mlmodelc/analytics/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13535ae56a04b77d84a922d0ff10bc425569df59eca00e4d93fcdc2c995c8c34 +oid sha256:72d9433176e6a80c761281219743bd081fc1f935801c88c62bfff49d064c7d7c size 243 diff --git a/logit-processor.mlmodelc/coremldata.bin b/logit-processor.mlmodelc/coremldata.bin index f0cf4b0b8ee474c2b0a9b82c8bf7319a7edea5e7..ab0dd010cc705b77733d5756e63af32ba44fc36f 100644 --- a/logit-processor.mlmodelc/coremldata.bin +++ b/logit-processor.mlmodelc/coremldata.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d87990cc182dc5d1a7fbc0b6161cd300a7b8c59b6488ffd941c21f86c7db6ea -size 311 +oid sha256:d09c9955603f8eb5f281da2d320ada09679d0da906b917d7c81652fa783ed19c +size 369 diff --git a/logit-processor.mlmodelc/metadata.json b/logit-processor.mlmodelc/metadata.json index ca20ac850d0091b1ce07cc347c2a8e7988cb474b..0301f749634d334e5577b762977e4be8aba34997 100644 --- a/logit-processor.mlmodelc/metadata.json +++ b/logit-processor.mlmodelc/metadata.json @@ -6,9 +6,9 @@ "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Int32", - "formattedType" : "MultiArray (Int32 1 × 64)", + "formattedType" : "MultiArray (Int32)", "shortDescription" : "", - "shape" : "[1, 64]", + "shape" : "[]", "name" : "argmax", "type" : "MultiArray" } @@ -36,18 +36,20 @@ "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.2" + "com.github.apple.coremltools.version" : "8.0b1" }, "inputSchema" : [ { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Float16", - "formattedType" : "MultiArray (Float16 1 × 64 × 32000)", "shortDescription" : "", - "shape" : "[1, 64, 32000]", + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 511 × 32000 | 1 × 1 × 32000 | 1 × 2 × 32000 | 1 × 4 × 32000 | 1 × 64 × 32000 | 1 × 512 × 32000", + "formattedType" : "MultiArray (Float16 1 × 511 × 32000)", + "type" : "MultiArray", + "shape" : "[1, 511, 32000]", "name" : "logits", - "type" : "MultiArray" + "enumeratedShapes" : "[[1, 511, 32000], [1, 1, 32000], [1, 2, 32000], [1, 4, 32000], [1, 64, 32000], [1, 512, 32000]]" } ], "generatedClassName" : "logit_processor", diff --git a/logit-processor.mlmodelc/model.mil b/logit-processor.mlmodelc/model.mil index d49b081fc3052d7e05171ccc76ac7ee373b1e2e8..1d0a9a01e104a44fdb81bca42c926ce8fc63aac8 100644 --- a/logit-processor.mlmodelc/model.mil +++ b/logit-processor.mlmodelc/model.mil @@ -1,9 +1,9 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] { - func main(tensor logits) { + func main(tensor logits) [FlexibleShapeInformation = tuple, dict, tensor>>, tuple, dict, dict, tensor>>>>((("DefaultShapes", {{"logits", [1, 511, 32000]}}), ("EnumeratedShapes", {{"logits_1_1_1_1_32000_", {{"logits", [1, 1, 32000]}}}, {"logits_1_1_1_2_32000_", {{"logits", [1, 2, 32000]}}}, {"logits_1_1_1_4_32000_", {{"logits", [1, 4, 32000]}}}, {"logits_1_1_1_511_32000_", {{"logits", [1, 511, 32000]}}}, {"logits_1_1_1_512_32000_", {{"logits", [1, 512, 32000]}}}, {"logits_1_1_1_64_32000_", {{"logits", [1, 64, 32000]}}}})))] { tensor var_2 = const()[name = tensor("op_2"), val = tensor(-1)]; tensor var_3 = const()[name = tensor("op_3"), val = tensor(false)]; - tensor argmax = reduce_argmax(axis = var_2, keep_dims = var_3, x = logits)[name = tensor("op_4_cast_fp16")]; + tensor argmax = reduce_argmax(axis = var_2, keep_dims = var_3, x = logits)[name = tensor("op_4_cast_fp16")]; } -> (argmax); } \ No newline at end of file