diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..795642f5d9e77353f486f59d448752d2e2b316a2 --- /dev/null +++ b/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2184893e48a9da76b01012a32cca3e2ebfd4080553daa78318fe2391679dd7fe +size 243 diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cd381ac5c32ecf671fe48f419b6af48cf1f229e --- /dev/null +++ b/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dbc016e9274c2a01d9eddb55dfd163a8ae74e7e97f0932268602c1a8b14903c +size 407 diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6913dc8ae5dea890292ef4531ac575b2245175b9 --- /dev/null +++ b/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json @@ -0,0 +1,104 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Select" : 2, + "Tile" : 2, + "Ios16.sub" : 3, + "Transpose" : 1, + "Ios16.gather" : 3, + "ExpandDims" : 4, + "Ios16.maximum" : 1, + "Ios16.less" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 64]", + "name" : "input_ids", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1)", + "shortDescription" : "", + "shape" : "[1]", + "name" : "full_sequence_length", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk1", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/model.mil b/Llama-2-7b-hf_chunk1.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..4613d6c3ac483995c10803555b436a36a44176ae --- /dev/null +++ b/Llama-2-7b-hf_chunk1.mlmodelc/model.mil @@ -0,0 +1,48 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor full_sequence_length, tensor input_ids) { + tensor T = const()[name = tensor("T"), val = tensor([64])]; + tensor x_axis_0 = const()[name = tensor("x_axis_0"), val = tensor(0)]; + tensor x_batch_dims_0 = const()[name = tensor("x_batch_dims_0"), val = tensor(0)]; + tensor wte_weight_to_fp16 = const()[name = tensor("wte_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_cast_fp16 = gather(axis = x_axis_0, batch_dims = x_batch_dims_0, indices = input_ids, x = wte_weight_to_fp16)[name = tensor("x_cast_fp16")]; + tensor var_16_perm_0 = const()[name = tensor("op_16_perm_0"), val = tensor([0, 2, 1])]; + tensor var_18_axes_0 = const()[name = tensor("op_18_axes_0"), val = tensor([2])]; + tensor transpose_0 = transpose(perm = var_16_perm_0, x = x_cast_fp16)[name = tensor("transpose_0")]; + tensor x = expand_dims(axes = var_18_axes_0, x = transpose_0)[name = tensor("op_18_cast_fp16")]; + tensor pos_offset = sub(x = T, y = full_sequence_length)[name = tensor("pos_offset")]; + tensor var_26 = const()[name = tensor("op_26"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])]; + tensor input_pos_1 = sub(x = var_26, y = pos_offset)[name = tensor("input_pos_1")]; + tensor var_34 = const()[name = tensor("op_34"), val = tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]; + tensor input_pos = maximum(x = input_pos_1, y = var_34)[name = tensor("input_pos")]; + tensor var_45 = const()[name = tensor("op_45"), val = tensor(1)]; + tensor var_46_batch_dims_0 = const()[name = tensor("op_46_batch_dims_0"), val = tensor(0)]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262144128)))]; + tensor cos = gather(axis = var_45, batch_dims = var_46_batch_dims_0, indices = input_pos, x = var_44_to_fp16)[name = tensor("op_46_cast_fp16")]; + tensor var_56 = const()[name = tensor("op_56"), val = tensor(1)]; + tensor var_57_batch_dims_0 = const()[name = tensor("op_57_batch_dims_0"), val = tensor(0)]; + tensor var_55_to_fp16 = const()[name = tensor("op_55_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262275264)))]; + tensor sin = gather(axis = var_56, batch_dims = var_57_batch_dims_0, indices = input_pos, x = var_55_to_fp16)[name = tensor("op_57_cast_fp16")]; + tensor var_92 = const()[name = tensor("op_92"), val = tensor([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63]])]; + tensor var_95 = less(x = var_92, y = pos_offset)[name = tensor("op_95")]; + tensor var_95_after_broadcast_reps_0 = const()[name = tensor("op_95_after_broadcast_reps_0"), val = tensor([1, 512])]; + tensor var_95_after_broadcast = tile(reps = var_95_after_broadcast_reps_0, x = var_95)[name = tensor("op_95_after_broadcast")]; + tensor all_mask_to_fp16 = const()[name = tensor("all_mask_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262406400)))]; + tensor m_1_to_fp16 = const()[name = tensor("m_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262472000)))]; + tensor m_3_cast_fp16 = select(a = all_mask_to_fp16, b = m_1_to_fp16, cond = var_95_after_broadcast)[name = tensor("m_3_cast_fp16")]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])]; + tensor var_106 = const()[name = tensor("op_106"), val = tensor(512)]; + tensor var_108 = sub(x = var_106, y = full_sequence_length)[name = tensor("op_108")]; + tensor var_109 = less(x = var_105, y = var_108)[name = tensor("op_109")]; + tensor expand_dims_0_axes_0 = const()[name = tensor("expand_dims_0_axes_0"), val = tensor([0])]; + tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = var_109)[name = tensor("expand_dims_0")]; + tensor var_109_after_broadcast_reps_0 = const()[name = tensor("op_109_after_broadcast_reps_0"), val = tensor([64, 1])]; + tensor var_109_after_broadcast = tile(reps = var_109_after_broadcast_reps_0, x = expand_dims_0)[name = tensor("op_109_after_broadcast")]; + tensor m_cast_fp16 = select(a = all_mask_to_fp16, b = m_3_cast_fp16, cond = var_109_after_broadcast)[name = tensor("m_cast_fp16")]; + tensor var_112_axes_0 = const()[name = tensor("op_112_axes_0"), val = tensor([0])]; + tensor var_112_cast_fp16 = expand_dims(axes = var_112_axes_0, x = m_cast_fp16)[name = tensor("op_112_cast_fp16")]; + tensor var_114_axes_0 = const()[name = tensor("op_114_axes_0"), val = tensor([0])]; + tensor mask = expand_dims(axes = var_114_axes_0, x = var_112_cast_fp16)[name = tensor("op_114_cast_fp16")]; + } -> (x, cos, sin, mask); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fe6a2bd0bcbbf09db93eddc06361f9091faeef3 --- /dev/null +++ b/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a8ba0e4d6fc824f820051588b446e6b72dfb09497a058e443ab071d9b3cbc7 +size 262537600 diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a8e1fcd6e9aac86c476bdfef211aba9441a747c --- /dev/null +++ b/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79e263bb20b8a02d650dad2c3eee71ff787829f337aedacb6cd4e1b61c1ce23 +size 791 diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3c26103bafb4738c6fd5b51b37c02a73f2b4b983 --- /dev/null +++ b/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk10", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/model.mil b/Llama-2-7b-hf_chunk10.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk10.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef3438de9426f16ce0e72e62a2db5afd5a560998 --- /dev/null +++ b/Llama-2-7b-hf_chunk10.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d4446017950797cf7896941f17c78be0e7c925911e4555f70b1133d20f77b9 +size 303872704 diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..13c34f866bf7312ea4a562467c8931a51d7d9932 --- /dev/null +++ b/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk11", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/model.mil b/Llama-2-7b-hf_chunk11.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk11.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..96a7b41de6393e0867ae5246888049739e50f0da --- /dev/null +++ b/Llama-2-7b-hf_chunk11.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9506f3438a1c857418b2dd28a4631b401f24e3bd606f0427c7adbf510af1e2dc +size 303872704 diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..94b21e541586d2c345445cad639367e7c6a0a244 --- /dev/null +++ b/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73e9cc1e9aaa1351af7ee9af6a10c0d8fd805fe2383635cee1714240351b5c2 +size 243 diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..93a5d37e577f7cb83d1e9df8e195e00cc041ac40 --- /dev/null +++ b/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e030d81f910b53587cf130f1dba0c1d731ab715ebd6ca0b4f475da21707b21e +size 651 diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..00a650ba3e3350963ebd9abfbfa8401cc8be692c --- /dev/null +++ b/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 12, + "Ios16.rsqrt" : 4, + "Ios16.mul" : 42, + "SliceByIndex" : 8, + "Ios16.constexprLutToDense" : 14, + "Ios16.conv" : 14, + "Ios16.add" : 14, + "Ios16.reduceMean" : 4, + "Ios16.matmul" : 4, + "Ios16.softmax" : 2, + "Ios16.reshape" : 8, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk12", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/model.mil b/Llama-2-7b-hf_chunk12.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..8de6335625c13a25c5109a209d6bcfe39538e22b --- /dev/null +++ b/Llama-2-7b-hf_chunk12.mlmodelc/model.mil @@ -0,0 +1,288 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_14 = const()[name = tensor("op_14"), val = tensor(3)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-2)]; + tensor var_21 = const()[name = tensor("op_21"), val = tensor(-1)]; + tensor var_28 = const()[name = tensor("op_28"), val = tensor(1)]; + tensor var_29 = const()[name = tensor("op_29"), val = tensor(true)]; + tensor var_37_cast_fp16 = mul(x = x, y = x)[name = tensor("op_37_cast_fp16")]; + tensor var_38 = const()[name = tensor("op_38"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_38, keep_dims = var_29, x = var_37_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_40_to_fp16 = const()[name = tensor("op_40_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_41_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_40_to_fp16)[name = tensor("op_41_cast_fp16")]; + tensor var_42_epsilon_0_to_fp16 = const()[name = tensor("op_42_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_42_cast_fp16 = rsqrt(epsilon = var_42_epsilon_0_to_fp16, x = var_41_cast_fp16)[name = tensor("op_42_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_42_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_54 = const()[name = tensor("op_54"), val = tensor([1, 1])]; + tensor var_56 = const()[name = tensor("op_56"), val = tensor([1, 1])]; + tensor var_58_pad_type_0 = const()[name = tensor("op_58_pad_type_0"), val = tensor("custom")]; + tensor var_58_pad_0 = const()[name = tensor("op_58_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_58_cast_fp16 = conv(dilations = var_56, groups = var_28, pad = var_58_pad_0, pad_type = var_58_pad_type_0, strides = var_54, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_58_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202386176)))]; + tensor q_1_cast_fp16 = mul(x = var_58_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_62 = const()[name = tensor("op_62"), val = tensor([1, 1])]; + tensor var_64 = const()[name = tensor("op_64"), val = tensor([1, 1])]; + tensor var_66_pad_type_0 = const()[name = tensor("op_66_pad_type_0"), val = tensor("custom")]; + tensor var_66_pad_0 = const()[name = tensor("op_66_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_66_cast_fp16 = conv(dilations = var_64, groups = var_28, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_62, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_66_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202394432)))]; + tensor k_1_cast_fp16 = mul(x = var_66_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_70 = const()[name = tensor("op_70"), val = tensor([1, 1])]; + tensor var_72 = const()[name = tensor("op_72"), val = tensor([1, 1])]; + tensor var_74_pad_type_0 = const()[name = tensor("op_74_pad_type_0"), val = tensor("custom")]; + tensor var_74_pad_0 = const()[name = tensor("op_74_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_74_cast_fp16 = conv(dilations = var_72, groups = var_28, pad = var_74_pad_0, pad_type = var_74_pad_type_0, strides = var_70, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_74_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202402688)))]; + tensor v_1_cast_fp16 = mul(x = var_74_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_76, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_78 = const()[name = tensor("op_78"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_78, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_80, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_92_begin_0 = const()[name = tensor("op_92_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_92_end_0 = const()[name = tensor("op_92_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_92_end_mask_0 = const()[name = tensor("op_92_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_92_cast_fp16 = slice_by_index(begin = var_92_begin_0, end = var_92_end_0, end_mask = var_92_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_92_cast_fp16")]; + tensor var_98_begin_0 = const()[name = tensor("op_98_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_98_end_0 = const()[name = tensor("op_98_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_98_end_mask_0 = const()[name = tensor("op_98_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_98_cast_fp16 = slice_by_index(begin = var_98_begin_0, end = var_98_end_0, end_mask = var_98_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_98_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_100_cast_fp16 = mul(x = var_98_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_100_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_19, interleave = rotated_1_interleave_0, values = (var_100_cast_fp16, var_92_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_103_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_103_cast_fp16")]; + tensor var_104_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_104_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_103_cast_fp16, y = var_104_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_117_begin_0 = const()[name = tensor("op_117_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_117_end_0 = const()[name = tensor("op_117_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_117_end_mask_0 = const()[name = tensor("op_117_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_117_cast_fp16 = slice_by_index(begin = var_117_begin_0, end = var_117_end_0, end_mask = var_117_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_117_cast_fp16")]; + tensor var_123_begin_0 = const()[name = tensor("op_123_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_123_end_0 = const()[name = tensor("op_123_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_123_end_mask_0 = const()[name = tensor("op_123_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = var_123_end_0, end_mask = var_123_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_123_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_125_cast_fp16 = mul(x = var_123_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_125_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_19, interleave = rotated_3_interleave_0, values = (var_125_cast_fp16, var_117_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_128_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_128_cast_fp16")]; + tensor var_129_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_129_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_128_cast_fp16, y = var_129_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_19, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_19, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_21, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_21, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_151_to_fp16 = const()[name = tensor("op_151_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_152_cast_fp16 = mul(x = q_5_cast_fp16, y = var_151_to_fp16)[name = tensor("op_152_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_152_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_160_cast_fp16 = softmax(axis = var_14, x = attn_weights_3_cast_fp16)[name = tensor("op_160_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_160_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_164 = const()[name = tensor("op_164"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_164, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 1])]; + tensor var_170 = const()[name = tensor("op_170"), val = tensor([1, 1])]; + tensor var_172_pad_type_0 = const()[name = tensor("op_172_pad_type_0"), val = tensor("custom")]; + tensor var_172_pad_0 = const()[name = tensor("op_172_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_172_cast_fp16 = conv(dilations = var_170, groups = var_28, pad = var_172_pad_0, pad_type = var_172_pad_type_0, strides = var_168, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_172_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202410944)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_172_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_181_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_181_cast_fp16")]; + tensor var_182 = const()[name = tensor("op_182"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_182, keep_dims = var_29, x = var_181_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_184_to_fp16 = const()[name = tensor("op_184_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_185_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_184_to_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186_epsilon_0_to_fp16 = const()[name = tensor("op_186_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_186_cast_fp16 = rsqrt(epsilon = var_186_epsilon_0_to_fp16, x = var_185_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_186_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202419200)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_198 = const()[name = tensor("op_198"), val = tensor([1, 1])]; + tensor var_200 = const()[name = tensor("op_200"), val = tensor([1, 1])]; + tensor var_202_pad_type_0 = const()[name = tensor("op_202_pad_type_0"), val = tensor("custom")]; + tensor var_202_pad_0 = const()[name = tensor("op_202_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_202_cast_fp16 = conv(dilations = var_200, groups = var_28, pad = var_202_pad_0, pad_type = var_202_pad_type_0, strides = var_198, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_202_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202427456)))]; + tensor input_5_cast_fp16 = mul(x = var_202_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_206 = const()[name = tensor("op_206"), val = tensor([1, 1])]; + tensor var_208 = const()[name = tensor("op_208"), val = tensor([1, 1])]; + tensor var_210_pad_type_0 = const()[name = tensor("op_210_pad_type_0"), val = tensor("custom")]; + tensor var_210_pad_0 = const()[name = tensor("op_210_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_210_cast_fp16 = conv(dilations = var_208, groups = var_28, pad = var_210_pad_0, pad_type = var_210_pad_type_0, strides = var_206, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_210_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202449536)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_210_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_212_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_212_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_212_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_216 = const()[name = tensor("op_216"), val = tensor([1, 1])]; + tensor var_218 = const()[name = tensor("op_218"), val = tensor([1, 1])]; + tensor var_220_pad_type_0 = const()[name = tensor("op_220_pad_type_0"), val = tensor("custom")]; + tensor var_220_pad_0 = const()[name = tensor("op_220_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_220_cast_fp16 = conv(dilations = var_218, groups = var_28, pad = var_220_pad_0, pad_type = var_220_pad_type_0, strides = var_216, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_220_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202471616)))]; + tensor var_221_cast_fp16 = mul(x = var_220_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_221_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_221_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_228 = const()[name = tensor("op_228"), val = tensor(3)]; + tensor var_233 = const()[name = tensor("op_233"), val = tensor(-2)]; + tensor var_235 = const()[name = tensor("op_235"), val = tensor(-1)]; + tensor var_242 = const()[name = tensor("op_242"), val = tensor(1)]; + tensor var_243 = const()[name = tensor("op_243"), val = tensor(true)]; + tensor var_250_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_250_cast_fp16")]; + tensor var_251 = const()[name = tensor("op_251"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_251, keep_dims = var_243, x = var_250_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_253_to_fp16 = const()[name = tensor("op_253_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_254_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_253_to_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255_epsilon_0_to_fp16 = const()[name = tensor("op_255_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_255_cast_fp16 = rsqrt(epsilon = var_255_epsilon_0_to_fp16, x = var_254_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_255_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202479872)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_270 = const()[name = tensor("op_270"), val = tensor([1, 1])]; + tensor var_272 = const()[name = tensor("op_272"), val = tensor([1, 1])]; + tensor var_274_pad_type_0 = const()[name = tensor("op_274_pad_type_0"), val = tensor("custom")]; + tensor var_274_pad_0 = const()[name = tensor("op_274_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_274_cast_fp16 = conv(dilations = var_272, groups = var_242, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_270, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202488128)))]; + tensor q_7_cast_fp16 = mul(x = var_274_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_278 = const()[name = tensor("op_278"), val = tensor([1, 1])]; + tensor var_280 = const()[name = tensor("op_280"), val = tensor([1, 1])]; + tensor var_282_pad_type_0 = const()[name = tensor("op_282_pad_type_0"), val = tensor("custom")]; + tensor var_282_pad_0 = const()[name = tensor("op_282_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_282_cast_fp16 = conv(dilations = var_280, groups = var_242, pad = var_282_pad_0, pad_type = var_282_pad_type_0, strides = var_278, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202496384)))]; + tensor k_9_cast_fp16 = mul(x = var_282_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_286 = const()[name = tensor("op_286"), val = tensor([1, 1])]; + tensor var_288 = const()[name = tensor("op_288"), val = tensor([1, 1])]; + tensor var_290_pad_type_0 = const()[name = tensor("op_290_pad_type_0"), val = tensor("custom")]; + tensor var_290_pad_0 = const()[name = tensor("op_290_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_290_cast_fp16 = conv(dilations = var_288, groups = var_242, pad = var_290_pad_0, pad_type = var_290_pad_type_0, strides = var_286, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202504640)))]; + tensor v_7_cast_fp16 = mul(x = var_290_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_292, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_294 = const()[name = tensor("op_294"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_294, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_296, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_308_begin_0 = const()[name = tensor("op_308_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_308_end_0 = const()[name = tensor("op_308_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_308_end_mask_0 = const()[name = tensor("op_308_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_308_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_316_cast_fp16 = mul(x = var_314_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_316_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_233, interleave = rotated_5_interleave_0, values = (var_316_cast_fp16, var_308_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_319_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_319_cast_fp16")]; + tensor var_320_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_320_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_319_cast_fp16, y = var_320_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_333_begin_0 = const()[name = tensor("op_333_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_333_end_0 = const()[name = tensor("op_333_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_333_end_mask_0 = const()[name = tensor("op_333_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = var_333_end_0, end_mask = var_333_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_333_cast_fp16")]; + tensor var_339_begin_0 = const()[name = tensor("op_339_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_339_end_0 = const()[name = tensor("op_339_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_339_end_mask_0 = const()[name = tensor("op_339_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_339_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_341_cast_fp16 = mul(x = var_339_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_341_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_233, interleave = rotated_interleave_0, values = (var_341_cast_fp16, var_333_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_344_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_344_cast_fp16")]; + tensor var_345_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_345_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_344_cast_fp16, y = var_345_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_233, interleave = q_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_233, interleave = k_13_interleave_0, values = roped_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_235, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_235, interleave = v_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_cast_fp16")]; + tensor var_367_to_fp16 = const()[name = tensor("op_367_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_368_cast_fp16 = mul(x = q_cast_fp16, y = var_367_to_fp16)[name = tensor("op_368_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_368_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_376_cast_fp16 = softmax(axis = var_228, x = attn_weights_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_cast_fp16, y = var_376_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_380 = const()[name = tensor("op_380"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_380, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 1])]; + tensor var_386 = const()[name = tensor("op_386"), val = tensor([1, 1])]; + tensor var_388_pad_type_0 = const()[name = tensor("op_388_pad_type_0"), val = tensor("custom")]; + tensor var_388_pad_0 = const()[name = tensor("op_388_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_388_cast_fp16 = conv(dilations = var_386, groups = var_242, pad = var_388_pad_0, pad_type = var_388_pad_type_0, strides = var_384, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202512896)))]; + tensor attention_output_cast_fp16 = mul(x = var_388_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_397_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_397_cast_fp16")]; + tensor var_398 = const()[name = tensor("op_398"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_398, keep_dims = var_243, x = var_397_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_400_to_fp16 = const()[name = tensor("op_400_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_401_cast_fp16 = add(x = norm_x_cast_fp16, y = var_400_to_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402_epsilon_0_to_fp16 = const()[name = tensor("op_402_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_402_cast_fp16 = rsqrt(epsilon = var_402_epsilon_0_to_fp16, x = var_401_cast_fp16)[name = tensor("op_402_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_402_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202521152)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_414 = const()[name = tensor("op_414"), val = tensor([1, 1])]; + tensor var_416 = const()[name = tensor("op_416"), val = tensor([1, 1])]; + tensor var_418_pad_type_0 = const()[name = tensor("op_418_pad_type_0"), val = tensor("custom")]; + tensor var_418_pad_0 = const()[name = tensor("op_418_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_418_cast_fp16 = conv(dilations = var_416, groups = var_242, pad = var_418_pad_0, pad_type = var_418_pad_type_0, strides = var_414, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_418_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202529408)))]; + tensor input_13_cast_fp16 = mul(x = var_418_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_422 = const()[name = tensor("op_422"), val = tensor([1, 1])]; + tensor var_424 = const()[name = tensor("op_424"), val = tensor([1, 1])]; + tensor var_426_pad_type_0 = const()[name = tensor("op_426_pad_type_0"), val = tensor("custom")]; + tensor var_426_pad_0 = const()[name = tensor("op_426_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_426_cast_fp16 = conv(dilations = var_424, groups = var_242, pad = var_426_pad_0, pad_type = var_426_pad_type_0, strides = var_422, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_426_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202551488)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_426_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_428_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_428_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_428_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_432 = const()[name = tensor("op_432"), val = tensor([1, 1])]; + tensor var_434 = const()[name = tensor("op_434"), val = tensor([1, 1])]; + tensor var_436_pad_type_0 = const()[name = tensor("op_436_pad_type_0"), val = tensor("custom")]; + tensor var_436_pad_0 = const()[name = tensor("op_436_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_436_cast_fp16 = conv(dilations = var_434, groups = var_242, pad = var_436_pad_0, pad_type = var_436_pad_type_0, strides = var_432, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_436_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202573568)))]; + tensor var_437_cast_fp16 = mul(x = var_436_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_437_cast_fp16")]; + tensor new_x = add(x = var_437_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_438_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_v_cache_0, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..eccea461663c91bc20e74a994f2b883fcb80fcf2 --- /dev/null +++ b/Llama-2-7b-hf_chunk12.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d085d837454b4685bcd36331b09a5b0b329f7ef4da1f2dbed101b7ec075630 +size 202581824 diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2d908c082131d02333987a46e271e2ecaea4168 --- /dev/null +++ b/Llama-2-7b-hf_chunk13.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55bbf17f4d2567d045baa3ae69337cad81c45f822491151ed7a5b29327f874f6 +size 243 diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cb69fe22262b38b05d6e777c572388e81f07f9d --- /dev/null +++ b/Llama-2-7b-hf_chunk13.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d860ea43d6f8ebbf70594a29be6231ee1d324bdaf2f26417eb82297acb920e17 +size 309 diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..063ca7460a27e8e6592a5e49cf088fa80e79c39d --- /dev/null +++ b/Llama-2-7b-hf_chunk13.mlmodelc/metadata.json @@ -0,0 +1,65 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 32000)", + "shortDescription" : "", + "shape" : "[1, 64, 32000]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 1, + "Ios16.add" : 1, + "Ios16.mul" : 3, + "Ios16.rsqrt" : 1, + "Transpose" : 1, + "Ios16.reshape" : 3, + "Ios16.reduceMean" : 1, + "Ios16.matmul" : 2, + "Squeeze" : 1 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk13", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/model.mil b/Llama-2-7b-hf_chunk13.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..bc8aeb22f04ed716b1900a2605fb171673aef783 --- /dev/null +++ b/Llama-2-7b-hf_chunk13.mlmodelc/model.mil @@ -0,0 +1,38 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor x) { + tensor var_6 = const()[name = tensor("op_6"), val = tensor(true)]; + tensor var_13_cast_fp16 = mul(x = x, y = x)[name = tensor("op_13_cast_fp16")]; + tensor var_14 = const()[name = tensor("op_14"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_14, keep_dims = var_6, x = var_13_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_16_to_fp16 = const()[name = tensor("op_16_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_17_cast_fp16 = add(x = norm_x_cast_fp16, y = var_16_to_fp16)[name = tensor("op_17_cast_fp16")]; + tensor var_18_epsilon_0_to_fp16 = const()[name = tensor("op_18_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_18_cast_fp16 = rsqrt(epsilon = var_18_epsilon_0_to_fp16, x = var_17_cast_fp16)[name = tensor("op_18_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_18_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor ln_f_weight_to_fp16 = const()[name = tensor("ln_f_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = ln_f_weight_to_fp16)[name = tensor("x_cast_fp16")]; + tensor var_23_axes_0 = const()[name = tensor("op_23_axes_0"), val = tensor([2])]; + tensor var_23_cast_fp16 = squeeze(axes = var_23_axes_0, x = x_cast_fp16)[name = tensor("op_23_cast_fp16")]; + tensor var_26_perm_0 = const()[name = tensor("op_26_perm_0"), val = tensor([0, 2, 1])]; + tensor concat_4 = const()[name = tensor("concat_4"), val = tensor([64, 4096])]; + tensor transpose_4 = transpose(perm = var_26_perm_0, x = var_23_cast_fp16)[name = tensor("transpose_4")]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_4, x = transpose_4)[name = tensor("reshape_0_cast_fp16")]; + tensor matmul_0_transpose_x_0 = const()[name = tensor("matmul_0_transpose_x_0"), val = tensor(false)]; + tensor matmul_0_transpose_y_0 = const()[name = tensor("matmul_0_transpose_y_0"), val = tensor(false)]; + tensor transpose_1_to_fp16 = const()[name = tensor("transpose_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8320)))]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_1_to_fp16)[name = tensor("matmul_0_cast_fp16")]; + tensor concat_8 = const()[name = tensor("concat_8"), val = tensor([1, 64, 16384])]; + tensor reshape_2_cast_fp16 = reshape(shape = concat_8, x = matmul_0_cast_fp16)[name = tensor("reshape_2_cast_fp16")]; + tensor matmul_1_transpose_x_0 = const()[name = tensor("matmul_1_transpose_x_0"), val = tensor(false)]; + tensor matmul_1_transpose_y_0 = const()[name = tensor("matmul_1_transpose_y_0"), val = tensor(false)]; + tensor transpose_3_to_fp16 = const()[name = tensor("transpose_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134226112)))]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_3_to_fp16)[name = tensor("matmul_1_cast_fp16")]; + tensor concat_16 = const()[name = tensor("concat_16"), val = tensor([1, 64, 15616])]; + tensor reshape_5_cast_fp16 = reshape(shape = concat_16, x = matmul_1_cast_fp16)[name = tensor("reshape_5_cast_fp16")]; + tensor var_41 = const()[name = tensor("op_41"), val = tensor(-1)]; + tensor var_42_interleave_0 = const()[name = tensor("op_42_interleave_0"), val = tensor(false)]; + tensor logits = concat(axis = var_41, interleave = var_42_interleave_0, values = (reshape_2_cast_fp16, reshape_5_cast_fp16))[name = tensor("op_42_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..b01cf7c867f985353c64ea8955aad628e988541d --- /dev/null +++ b/Llama-2-7b-hf_chunk13.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23cc0c8382a52638c94e9c9963873d35d3222e897233b39b03f4cc92deae2edb +size 262152448 diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c5c8878f7ed07b36bc29057ec3f6367e40453195 --- /dev/null +++ b/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk2", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/model.mil b/Llama-2-7b-hf_chunk2.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk2.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..a217cc758bd417b95a3f1548247409bc3ba39c98 --- /dev/null +++ b/Llama-2-7b-hf_chunk2.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25138904c91ffd7e03563365ae012b5b126a2b75fc66880152e092e7680e211 +size 303872704 diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..15b9e234e921a5fda79679436c3b40e757f59b50 --- /dev/null +++ b/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk3", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/model.mil b/Llama-2-7b-hf_chunk3.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk3.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..dba7058109d5b8403f52bac7ed8290e93629dfea --- /dev/null +++ b/Llama-2-7b-hf_chunk3.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1bc13ecfabbb4f02f8306bf18913019826fb28b002e14f11bddeca7a9edefa +size 303872704 diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a8e1fcd6e9aac86c476bdfef211aba9441a747c --- /dev/null +++ b/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79e263bb20b8a02d650dad2c3eee71ff787829f337aedacb6cd4e1b61c1ce23 +size 791 diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4df4a4e982e19c4eb6320f8c0dbc8e1d8389aa3c --- /dev/null +++ b/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk4", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/model.mil b/Llama-2-7b-hf_chunk4.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk4.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..e21eacda88a1266963fc0519fdb65ae62436f1ce --- /dev/null +++ b/Llama-2-7b-hf_chunk4.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b1969a0b2372ca72340108bf7967f643d02a423cac947a5bd3608fdde48b86 +size 303872704 diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..82b7676890485cec49c89b0bfcbf270d57ced7bf --- /dev/null +++ b/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk5", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/model.mil b/Llama-2-7b-hf_chunk5.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk5.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..c71f381de60901f6e03693f18cec15ec5d645b9e --- /dev/null +++ b/Llama-2-7b-hf_chunk5.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d931534284a44e5004b85274be8d122ee55af90a599ea689a9491c6ce13fa16 +size 303872704 diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..bbc232f400d84f860b0bbc9d074f8a021d011992 --- /dev/null +++ b/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk6", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/model.mil b/Llama-2-7b-hf_chunk6.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk6.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d9b250e04d53ce35dcb3567344f6896b14e8a2a --- /dev/null +++ b/Llama-2-7b-hf_chunk6.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646d17c5d6d62e055abb88615254cb2d8205cd46a7b98faa734136f30c8ca26a +size 303872704 diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..eb0d6148a6e32ea64a3a9b331c6987331f31a9ce --- /dev/null +++ b/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk7", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/model.mil b/Llama-2-7b-hf_chunk7.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk7.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..ec66ac965de015149f69a8beb6ecaacf950c8e57 --- /dev/null +++ b/Llama-2-7b-hf_chunk7.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a1c4e9e3f25b1ec4e4b57b7effd4816ff0645b75014cb32186e4848748a7be +size 303872704 diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a8e1fcd6e9aac86c476bdfef211aba9441a747c --- /dev/null +++ b/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79e263bb20b8a02d650dad2c3eee71ff787829f337aedacb6cd4e1b61c1ce23 +size 791 diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b8a06125e6fd1b3852d5ec0c5ad26c3eda1b22e1 --- /dev/null +++ b/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk8", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/model.mil b/Llama-2-7b-hf_chunk8.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk8.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a5ab100b7b2a167e2520f92e51d3193a316a8a4 --- /dev/null +++ b/Llama-2-7b-hf_chunk8.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0072b3a69cac604bd35ef4f9b83c10dfcb19a5eea0a8dac1e2402f4981ea530a +size 303872704 diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin b/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7ea30d8b9b1a6ace9d57a3a4d1e4b9c8ba52f9c --- /dev/null +++ b/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3412284b024b899a736cd77112d4b1a4a5faa19d954259e925ef429f58bd886b +size 243 diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin b/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ad11cfd66dc8c57b5f22d5b34fabfd70ed8347 --- /dev/null +++ b/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589729b2995d8ca8246bbb5d92b910207bab816ad67282b0a285bcd2de77f80e +size 791 diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json b/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..dbc7d27946c8739ff15e79e1d1de5f40df227a2e --- /dev/null +++ b/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json @@ -0,0 +1,218 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Mixed (Float16, Palettized (4 bits))", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache_2", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 18, + "Ios16.rsqrt" : 6, + "Ios16.mul" : 63, + "SliceByIndex" : 12, + "Ios16.constexprLutToDense" : 21, + "Ios16.conv" : 21, + "Ios16.add" : 21, + "Ios16.reduceMean" : 6, + "Ios16.matmul" : 6, + "Ios16.softmax" : 3, + "Ios16.reshape" : 12, + "Ios16.silu" : 3 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 4096, 1, 64]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 512)", + "shortDescription" : "", + "shape" : "[1, 1, 64, 512]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "k_cache_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)?", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "v_cache_2", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_2_7b_hf_2024_05_25_14_03_55_chunk9", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/model.mil b/Llama-2-7b-hf_chunk9.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d5387d44d58aa12214b26cdaf15fcd539841a734 --- /dev/null +++ b/Llama-2-7b-hf_chunk9.mlmodelc/model.mil @@ -0,0 +1,429 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor k_cache_2, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor v_cache_2, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"k_cache_2", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}, {"v_cache_2", 0}})] { + tensor blocks_0_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388736))), name = tensor("blocks_0_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8388864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777536))), name = tensor("blocks_0_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166336))), name = tensor("blocks_0_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25166464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555136))), name = tensor("blocks_0_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33555264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099712))), name = tensor("blocks_0_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56099840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644288))), name = tensor("blocks_0_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_0_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78644416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188864))), name = tensor("blocks_0_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_1_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101188992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577664))), name = tensor("blocks_1_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109577792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966464))), name = tensor("blocks_1_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117966592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355264))), name = tensor("blocks_1_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126355392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744064))), name = tensor("blocks_1_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134744192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288640))), name = tensor("blocks_1_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157288768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833216))), name = tensor("blocks_1_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_1_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179833344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377792))), name = tensor("blocks_1_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor blocks_2_attn_q_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202377920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766592))), name = tensor("blocks_2_attn_q_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_k_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210766720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155392))), name = tensor("blocks_2_attn_k_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_v_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219155520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544192))), name = tensor("blocks_2_attn_v_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_attn_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(227544320))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235932992))), name = tensor("blocks_2_attn_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_1_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235933120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477568))), name = tensor("blocks_2_mlp_fc_1_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_fc_2_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258477696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022144))), name = tensor("blocks_2_mlp_fc_2_weight_palettized_cast_fp16"), shape = tensor([11008, 4096, 1, 1])]; + tensor blocks_2_mlp_proj_weight_palettized_cast_fp16 = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281022272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566720))), name = tensor("blocks_2_mlp_proj_weight_palettized_cast_fp16"), shape = tensor([4096, 11008, 1, 1])]; + tensor var_18 = const()[name = tensor("op_18"), val = tensor(3)]; + tensor var_23 = const()[name = tensor("op_23"), val = tensor(-2)]; + tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_33 = const()[name = tensor("op_33"), val = tensor(true)]; + tensor var_41_cast_fp16 = mul(x = x, y = x)[name = tensor("op_41_cast_fp16")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_mean(axes = var_42, keep_dims = var_33, x = var_41_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45_cast_fp16 = add(x = norm_x_1_cast_fp16, y = var_44_to_fp16)[name = tensor("op_45_cast_fp16")]; + tensor var_46_epsilon_0_to_fp16 = const()[name = tensor("op_46_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_46_cast_fp16 = rsqrt(epsilon = var_46_epsilon_0_to_fp16, x = var_45_cast_fp16)[name = tensor("op_46_cast_fp16")]; + tensor x_normed_1_cast_fp16 = mul(x = x, y = var_46_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303566848)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([1, 1])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1, 1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_62_cast_fp16 = conv(dilations = var_60, groups = var_32, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = blocks_0_attn_q_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor blocks_0_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303575104)))]; + tensor q_1_cast_fp16 = mul(x = var_62_cast_fp16, y = blocks_0_attn_q_proj_output_scales_to_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor([1, 1])]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([1, 1])]; + tensor var_70_pad_type_0 = const()[name = tensor("op_70_pad_type_0"), val = tensor("custom")]; + tensor var_70_pad_0 = const()[name = tensor("op_70_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_70_cast_fp16 = conv(dilations = var_68, groups = var_32, pad = var_70_pad_0, pad_type = var_70_pad_type_0, strides = var_66, weight = blocks_0_attn_k_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_70_cast_fp16")]; + tensor blocks_0_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303583360)))]; + tensor k_1_cast_fp16 = mul(x = var_70_cast_fp16, y = blocks_0_attn_k_proj_output_scales_to_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_74 = const()[name = tensor("op_74"), val = tensor([1, 1])]; + tensor var_76 = const()[name = tensor("op_76"), val = tensor([1, 1])]; + tensor var_78_pad_type_0 = const()[name = tensor("op_78_pad_type_0"), val = tensor("custom")]; + tensor var_78_pad_0 = const()[name = tensor("op_78_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_78_cast_fp16 = conv(dilations = var_76, groups = var_32, pad = var_78_pad_0, pad_type = var_78_pad_type_0, strides = var_74, weight = blocks_0_attn_v_proj_weight_palettized_cast_fp16, x = x_5_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor blocks_0_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303591616)))]; + tensor v_1_cast_fp16 = mul(x = var_78_cast_fp16, y = blocks_0_attn_v_proj_output_scales_to_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 32, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_80, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor([1, 32, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_82, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_0 = reshape(shape = var_84, x = v_1_cast_fp16)[name = tensor("v_3_cast_fp16")]; + tensor var_96_begin_0 = const()[name = tensor("op_96_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_96_end_0 = const()[name = tensor("op_96_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_96_end_mask_0 = const()[name = tensor("op_96_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_96_cast_fp16 = slice_by_index(begin = var_96_begin_0, end = var_96_end_0, end_mask = var_96_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_96_cast_fp16")]; + tensor var_102_begin_0 = const()[name = tensor("op_102_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_102_end_0 = const()[name = tensor("op_102_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_102_end_mask_0 = const()[name = tensor("op_102_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_102_cast_fp16 = slice_by_index(begin = var_102_begin_0, end = var_102_end_0, end_mask = var_102_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor const_3_promoted_to_fp16 = const()[name = tensor("const_3_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_104_cast_fp16 = mul(x = var_102_cast_fp16, y = const_3_promoted_to_fp16)[name = tensor("op_104_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_23, interleave = rotated_1_interleave_0, values = (var_104_cast_fp16, var_96_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_107_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_107_cast_fp16")]; + tensor var_108_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_108_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_107_cast_fp16, y = var_108_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_121_begin_0 = const()[name = tensor("op_121_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_121_end_0 = const()[name = tensor("op_121_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_121_end_mask_0 = const()[name = tensor("op_121_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_121_cast_fp16 = slice_by_index(begin = var_121_begin_0, end = var_121_end_0, end_mask = var_121_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_121_cast_fp16")]; + tensor var_127_begin_0 = const()[name = tensor("op_127_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_127_end_0 = const()[name = tensor("op_127_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_127_end_mask_0 = const()[name = tensor("op_127_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_127_cast_fp16 = slice_by_index(begin = var_127_begin_0, end = var_127_end_0, end_mask = var_127_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_127_cast_fp16")]; + tensor const_5_promoted_to_fp16 = const()[name = tensor("const_5_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_129_cast_fp16 = mul(x = var_127_cast_fp16, y = const_5_promoted_to_fp16)[name = tensor("op_129_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_23, interleave = rotated_3_interleave_0, values = (var_129_cast_fp16, var_121_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_132_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_132_cast_fp16")]; + tensor var_133_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_133_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_132_cast_fp16, y = var_133_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor q_5_interleave_0 = const()[name = tensor("q_5_interleave_0"), val = tensor(false)]; + tensor q_5_cast_fp16 = concat(axis = var_23, interleave = q_5_interleave_0, values = roped_1_cast_fp16)[name = tensor("q_5_cast_fp16")]; + tensor k_5_interleave_0 = const()[name = tensor("k_5_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = concat(axis = var_23, interleave = k_5_interleave_0, values = roped_3_cast_fp16)[name = tensor("k_5_cast_fp16")]; + tensor k_7_interleave_0 = const()[name = tensor("k_7_interleave_0"), val = tensor(false)]; + tensor k_7_cast_fp16 = concat(axis = var_25, interleave = k_7_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_7_cast_fp16")]; + tensor v_5_interleave_0 = const()[name = tensor("v_5_interleave_0"), val = tensor(false)]; + tensor v_5_cast_fp16 = concat(axis = var_25, interleave = v_5_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_5_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_156_cast_fp16 = mul(x = q_5_cast_fp16, y = var_155_to_fp16)[name = tensor("op_156_cast_fp16")]; + tensor attn_weights_1_transpose_x_0 = const()[name = tensor("attn_weights_1_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_1_transpose_y_0 = const()[name = tensor("attn_weights_1_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_1_cast_fp16 = matmul(transpose_x = attn_weights_1_transpose_x_0, transpose_y = attn_weights_1_transpose_y_0, x = var_156_cast_fp16, y = k_7_cast_fp16)[name = tensor("attn_weights_1_cast_fp16")]; + tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = mask)[name = tensor("attn_weights_3_cast_fp16")]; + tensor var_164_cast_fp16 = softmax(axis = var_18, x = attn_weights_3_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = v_5_cast_fp16, y = var_164_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_168 = const()[name = tensor("op_168"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_168, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_172 = const()[name = tensor("op_172"), val = tensor([1, 1])]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176_pad_type_0 = const()[name = tensor("op_176_pad_type_0"), val = tensor("custom")]; + tensor var_176_pad_0 = const()[name = tensor("op_176_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_176_cast_fp16 = conv(dilations = var_174, groups = var_32, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_172, weight = blocks_0_attn_proj_weight_palettized_cast_fp16, x = input_1_cast_fp16)[name = tensor("op_176_cast_fp16")]; + tensor blocks_0_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303599872)))]; + tensor attention_output_1_cast_fp16 = mul(x = var_176_cast_fp16, y = blocks_0_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_11_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_11_cast_fp16")]; + tensor var_185_cast_fp16 = mul(x = x_11_cast_fp16, y = x_11_cast_fp16)[name = tensor("op_185_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_mean(axes = var_186, keep_dims = var_33, x = var_185_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor var_188_to_fp16 = const()[name = tensor("op_188_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_189_cast_fp16 = add(x = norm_x_3_cast_fp16, y = var_188_to_fp16)[name = tensor("op_189_cast_fp16")]; + tensor var_190_epsilon_0_to_fp16 = const()[name = tensor("op_190_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_190_cast_fp16 = rsqrt(epsilon = var_190_epsilon_0_to_fp16, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor x_normed_5_cast_fp16 = mul(x = x_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("x_normed_5_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303608128)))]; + tensor input_3_cast_fp16 = mul(x = x_normed_5_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 1])]; + tensor var_204 = const()[name = tensor("op_204"), val = tensor([1, 1])]; + tensor var_206_pad_type_0 = const()[name = tensor("op_206_pad_type_0"), val = tensor("custom")]; + tensor var_206_pad_0 = const()[name = tensor("op_206_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_206_cast_fp16 = conv(dilations = var_204, groups = var_32, pad = var_206_pad_0, pad_type = var_206_pad_type_0, strides = var_202, weight = blocks_0_mlp_fc_1_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor blocks_0_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303616384)))]; + tensor input_5_cast_fp16 = mul(x = var_206_cast_fp16, y = blocks_0_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, 1])]; + tensor var_212 = const()[name = tensor("op_212"), val = tensor([1, 1])]; + tensor var_214_pad_type_0 = const()[name = tensor("op_214_pad_type_0"), val = tensor("custom")]; + tensor var_214_pad_0 = const()[name = tensor("op_214_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_214_cast_fp16 = conv(dilations = var_212, groups = var_32, pad = var_214_pad_0, pad_type = var_214_pad_type_0, strides = var_210, weight = blocks_0_mlp_fc_2_weight_palettized_cast_fp16, x = input_3_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor blocks_0_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303638464)))]; + tensor x_fc_2_1_cast_fp16 = mul(x = var_214_cast_fp16, y = blocks_0_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_216_cast_fp16 = silu(x = input_5_cast_fp16)[name = tensor("op_216_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_216_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 1])]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 1])]; + tensor var_224_pad_type_0 = const()[name = tensor("op_224_pad_type_0"), val = tensor("custom")]; + tensor var_224_pad_0 = const()[name = tensor("op_224_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_224_cast_fp16 = conv(dilations = var_222, groups = var_32, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_220, weight = blocks_0_mlp_proj_weight_palettized_cast_fp16, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor blocks_0_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303660544)))]; + tensor var_225_cast_fp16 = mul(x = var_224_cast_fp16, y = blocks_0_mlp_proj_output_scales_to_fp16)[name = tensor("op_225_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = var_225_cast_fp16, y = x_11_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor(3)]; + tensor var_237 = const()[name = tensor("op_237"), val = tensor(-2)]; + tensor var_239 = const()[name = tensor("op_239"), val = tensor(-1)]; + tensor var_246 = const()[name = tensor("op_246"), val = tensor(1)]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor(true)]; + tensor var_254_cast_fp16 = mul(x = x_15_cast_fp16, y = x_15_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_255 = const()[name = tensor("op_255"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_mean(axes = var_255, keep_dims = var_247, x = var_254_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = tensor("op_257_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_258_cast_fp16 = add(x = norm_x_5_cast_fp16, y = var_257_to_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_259_epsilon_0_to_fp16 = const()[name = tensor("op_259_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_259_cast_fp16 = rsqrt(epsilon = var_259_epsilon_0_to_fp16, x = var_258_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor x_normed_9_cast_fp16 = mul(x = x_15_cast_fp16, y = var_259_cast_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303668800)))]; + tensor x_19_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276 = const()[name = tensor("op_276"), val = tensor([1, 1])]; + tensor var_278_pad_type_0 = const()[name = tensor("op_278_pad_type_0"), val = tensor("custom")]; + tensor var_278_pad_0 = const()[name = tensor("op_278_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_278_cast_fp16 = conv(dilations = var_276, groups = var_246, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_274, weight = blocks_1_attn_q_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor blocks_1_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303677056)))]; + tensor q_7_cast_fp16 = mul(x = var_278_cast_fp16, y = blocks_1_attn_q_proj_output_scales_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 1])]; + tensor var_284 = const()[name = tensor("op_284"), val = tensor([1, 1])]; + tensor var_286_pad_type_0 = const()[name = tensor("op_286_pad_type_0"), val = tensor("custom")]; + tensor var_286_pad_0 = const()[name = tensor("op_286_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_286_cast_fp16 = conv(dilations = var_284, groups = var_246, pad = var_286_pad_0, pad_type = var_286_pad_type_0, strides = var_282, weight = blocks_1_attn_k_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor blocks_1_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303685312)))]; + tensor k_9_cast_fp16 = mul(x = var_286_cast_fp16, y = blocks_1_attn_k_proj_output_scales_to_fp16)[name = tensor("k_9_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor var_294_pad_type_0 = const()[name = tensor("op_294_pad_type_0"), val = tensor("custom")]; + tensor var_294_pad_0 = const()[name = tensor("op_294_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_294_cast_fp16 = conv(dilations = var_292, groups = var_246, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_290, weight = blocks_1_attn_v_proj_weight_palettized_cast_fp16, x = x_19_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor blocks_1_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303693568)))]; + tensor v_7_cast_fp16 = mul(x = var_294_cast_fp16, y = blocks_1_attn_v_proj_output_scales_to_fp16)[name = tensor("v_7_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor([1, 32, 128, 64])]; + tensor q_9_cast_fp16 = reshape(shape = var_296, x = q_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 32, 128, 64])]; + tensor k_11_cast_fp16 = reshape(shape = var_298, x = k_9_cast_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_1 = reshape(shape = var_300, x = v_7_cast_fp16)[name = tensor("v_9_cast_fp16")]; + tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_320_cast_fp16 = mul(x = var_318_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_320_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_237, interleave = rotated_5_interleave_0, values = (var_320_cast_fp16, var_312_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_323_cast_fp16 = mul(x = q_9_cast_fp16, y = cos)[name = tensor("op_323_cast_fp16")]; + tensor var_324_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_324_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_323_cast_fp16, y = var_324_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_343_begin_0 = const()[name = tensor("op_343_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_343_end_0 = const()[name = tensor("op_343_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_343_end_mask_0 = const()[name = tensor("op_343_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_343_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_345_cast_fp16 = mul(x = var_343_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_345_cast_fp16")]; + tensor rotated_7_interleave_0 = const()[name = tensor("rotated_7_interleave_0"), val = tensor(false)]; + tensor rotated_7_cast_fp16 = concat(axis = var_237, interleave = rotated_7_interleave_0, values = (var_345_cast_fp16, var_337_cast_fp16))[name = tensor("rotated_7_cast_fp16")]; + tensor var_348_cast_fp16 = mul(x = k_11_cast_fp16, y = cos)[name = tensor("op_348_cast_fp16")]; + tensor var_349_cast_fp16 = mul(x = rotated_7_cast_fp16, y = sin)[name = tensor("op_349_cast_fp16")]; + tensor roped_7_cast_fp16 = add(x = var_348_cast_fp16, y = var_349_cast_fp16)[name = tensor("roped_7_cast_fp16")]; + tensor q_11_interleave_0 = const()[name = tensor("q_11_interleave_0"), val = tensor(false)]; + tensor q_11_cast_fp16 = concat(axis = var_237, interleave = q_11_interleave_0, values = roped_5_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor k_13_interleave_0 = const()[name = tensor("k_13_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = concat(axis = var_237, interleave = k_13_interleave_0, values = roped_7_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor k_15_interleave_0 = const()[name = tensor("k_15_interleave_0"), val = tensor(false)]; + tensor k_15_cast_fp16 = concat(axis = var_239, interleave = k_15_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_15_cast_fp16")]; + tensor v_11_interleave_0 = const()[name = tensor("v_11_interleave_0"), val = tensor(false)]; + tensor v_11_cast_fp16 = concat(axis = var_239, interleave = v_11_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_11_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_372_cast_fp16 = mul(x = q_11_cast_fp16, y = var_371_to_fp16)[name = tensor("op_372_cast_fp16")]; + tensor attn_weights_5_transpose_x_0 = const()[name = tensor("attn_weights_5_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_5_transpose_y_0 = const()[name = tensor("attn_weights_5_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_5_cast_fp16 = matmul(transpose_x = attn_weights_5_transpose_x_0, transpose_y = attn_weights_5_transpose_y_0, x = var_372_cast_fp16, y = k_15_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; + tensor attn_weights_7_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = mask)[name = tensor("attn_weights_7_cast_fp16")]; + tensor var_380_cast_fp16 = softmax(axis = var_232, x = attn_weights_7_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = v_11_cast_fp16, y = var_380_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_384 = const()[name = tensor("op_384"), val = tensor([1, 4096, 1, -1])]; + tensor input_9_cast_fp16 = reshape(shape = var_384, x = attn_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_388 = const()[name = tensor("op_388"), val = tensor([1, 1])]; + tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, 1])]; + tensor var_392_pad_type_0 = const()[name = tensor("op_392_pad_type_0"), val = tensor("custom")]; + tensor var_392_pad_0 = const()[name = tensor("op_392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_392_cast_fp16 = conv(dilations = var_390, groups = var_246, pad = var_392_pad_0, pad_type = var_392_pad_type_0, strides = var_388, weight = blocks_1_attn_proj_weight_palettized_cast_fp16, x = input_9_cast_fp16)[name = tensor("op_392_cast_fp16")]; + tensor blocks_1_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303701824)))]; + tensor attention_output_3_cast_fp16 = mul(x = var_392_cast_fp16, y = blocks_1_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_3_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = attention_output_3_cast_fp16, y = x_15_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_401_cast_fp16 = mul(x = x_25_cast_fp16, y = x_25_cast_fp16)[name = tensor("op_401_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1])]; + tensor norm_x_7_cast_fp16 = reduce_mean(axes = var_402, keep_dims = var_247, x = var_401_cast_fp16)[name = tensor("norm_x_7_cast_fp16")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = norm_x_7_cast_fp16, y = var_404_to_fp16)[name = tensor("op_405_cast_fp16")]; + tensor var_406_epsilon_0_to_fp16 = const()[name = tensor("op_406_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0_to_fp16, x = var_405_cast_fp16)[name = tensor("op_406_cast_fp16")]; + tensor x_normed_13_cast_fp16 = mul(x = x_25_cast_fp16, y = var_406_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303710080)))]; + tensor input_11_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 1])]; + tensor var_420 = const()[name = tensor("op_420"), val = tensor([1, 1])]; + tensor var_422_pad_type_0 = const()[name = tensor("op_422_pad_type_0"), val = tensor("custom")]; + tensor var_422_pad_0 = const()[name = tensor("op_422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_cast_fp16 = conv(dilations = var_420, groups = var_246, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_418, weight = blocks_1_mlp_fc_1_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_422_cast_fp16")]; + tensor blocks_1_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303718336)))]; + tensor input_13_cast_fp16 = mul(x = var_422_cast_fp16, y = blocks_1_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 1])]; + tensor var_428 = const()[name = tensor("op_428"), val = tensor([1, 1])]; + tensor var_430_pad_type_0 = const()[name = tensor("op_430_pad_type_0"), val = tensor("custom")]; + tensor var_430_pad_0 = const()[name = tensor("op_430_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_430_cast_fp16 = conv(dilations = var_428, groups = var_246, pad = var_430_pad_0, pad_type = var_430_pad_type_0, strides = var_426, weight = blocks_1_mlp_fc_2_weight_palettized_cast_fp16, x = input_11_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor blocks_1_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303740416)))]; + tensor x_fc_2_3_cast_fp16 = mul(x = var_430_cast_fp16, y = blocks_1_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_3_cast_fp16")]; + tensor var_432_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("op_432_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_432_cast_fp16, y = x_fc_2_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor var_438 = const()[name = tensor("op_438"), val = tensor([1, 1])]; + tensor var_440_pad_type_0 = const()[name = tensor("op_440_pad_type_0"), val = tensor("custom")]; + tensor var_440_pad_0 = const()[name = tensor("op_440_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_440_cast_fp16 = conv(dilations = var_438, groups = var_246, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_436, weight = blocks_1_mlp_proj_weight_palettized_cast_fp16, x = input_15_cast_fp16)[name = tensor("op_440_cast_fp16")]; + tensor blocks_1_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303762496)))]; + tensor var_441_cast_fp16 = mul(x = var_440_cast_fp16, y = blocks_1_mlp_proj_output_scales_to_fp16)[name = tensor("op_441_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = var_441_cast_fp16, y = x_25_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_448 = const()[name = tensor("op_448"), val = tensor(3)]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor(-2)]; + tensor var_455 = const()[name = tensor("op_455"), val = tensor(-1)]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor(1)]; + tensor var_463 = const()[name = tensor("op_463"), val = tensor(true)]; + tensor var_470_cast_fp16 = mul(x = x_29_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_471 = const()[name = tensor("op_471"), val = tensor([1])]; + tensor norm_x_9_cast_fp16 = reduce_mean(axes = var_471, keep_dims = var_463, x = var_470_cast_fp16)[name = tensor("norm_x_9_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_474_cast_fp16 = add(x = norm_x_9_cast_fp16, y = var_473_to_fp16)[name = tensor("op_474_cast_fp16")]; + tensor var_475_epsilon_0_to_fp16 = const()[name = tensor("op_475_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_475_cast_fp16 = rsqrt(epsilon = var_475_epsilon_0_to_fp16, x = var_474_cast_fp16)[name = tensor("op_475_cast_fp16")]; + tensor x_normed_17_cast_fp16 = mul(x = x_29_cast_fp16, y = var_475_cast_fp16)[name = tensor("x_normed_17_cast_fp16")]; + tensor blocks_2_norm_1_weight_to_fp16 = const()[name = tensor("blocks_2_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303770752)))]; + tensor x_33_cast_fp16 = mul(x = x_normed_17_cast_fp16, y = blocks_2_norm_1_weight_to_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_490 = const()[name = tensor("op_490"), val = tensor([1, 1])]; + tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 1])]; + tensor var_494_pad_type_0 = const()[name = tensor("op_494_pad_type_0"), val = tensor("custom")]; + tensor var_494_pad_0 = const()[name = tensor("op_494_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_494_cast_fp16 = conv(dilations = var_492, groups = var_462, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_490, weight = blocks_2_attn_q_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor blocks_2_attn_q_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_q_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303779008)))]; + tensor q_13_cast_fp16 = mul(x = var_494_cast_fp16, y = blocks_2_attn_q_proj_output_scales_to_fp16)[name = tensor("q_13_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 1])]; + tensor var_500 = const()[name = tensor("op_500"), val = tensor([1, 1])]; + tensor var_502_pad_type_0 = const()[name = tensor("op_502_pad_type_0"), val = tensor("custom")]; + tensor var_502_pad_0 = const()[name = tensor("op_502_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_502_cast_fp16 = conv(dilations = var_500, groups = var_462, pad = var_502_pad_0, pad_type = var_502_pad_type_0, strides = var_498, weight = blocks_2_attn_k_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor blocks_2_attn_k_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_k_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303787264)))]; + tensor k_17_cast_fp16 = mul(x = var_502_cast_fp16, y = blocks_2_attn_k_proj_output_scales_to_fp16)[name = tensor("k_17_cast_fp16")]; + tensor var_506 = const()[name = tensor("op_506"), val = tensor([1, 1])]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor([1, 1])]; + tensor var_510_pad_type_0 = const()[name = tensor("op_510_pad_type_0"), val = tensor("custom")]; + tensor var_510_pad_0 = const()[name = tensor("op_510_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_510_cast_fp16 = conv(dilations = var_508, groups = var_462, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_506, weight = blocks_2_attn_v_proj_weight_palettized_cast_fp16, x = x_33_cast_fp16)[name = tensor("op_510_cast_fp16")]; + tensor blocks_2_attn_v_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_v_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303795520)))]; + tensor v_13_cast_fp16 = mul(x = var_510_cast_fp16, y = blocks_2_attn_v_proj_output_scales_to_fp16)[name = tensor("v_13_cast_fp16")]; + tensor var_512 = const()[name = tensor("op_512"), val = tensor([1, 32, 128, 64])]; + tensor q_15_cast_fp16 = reshape(shape = var_512, x = q_13_cast_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_514 = const()[name = tensor("op_514"), val = tensor([1, 32, 128, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_514, x = k_17_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_516 = const()[name = tensor("op_516"), val = tensor([1, 32, 128, 64])]; + tensor new_v_cache_2 = reshape(shape = var_516, x = v_13_cast_fp16)[name = tensor("v_15_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_534_cast_fp16")]; + tensor const_17_promoted_to_fp16 = const()[name = tensor("const_17_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_536_cast_fp16 = mul(x = var_534_cast_fp16, y = const_17_promoted_to_fp16)[name = tensor("op_536_cast_fp16")]; + tensor rotated_9_interleave_0 = const()[name = tensor("rotated_9_interleave_0"), val = tensor(false)]; + tensor rotated_9_cast_fp16 = concat(axis = var_453, interleave = rotated_9_interleave_0, values = (var_536_cast_fp16, var_528_cast_fp16))[name = tensor("rotated_9_cast_fp16")]; + tensor var_539_cast_fp16 = mul(x = q_15_cast_fp16, y = cos)[name = tensor("op_539_cast_fp16")]; + tensor var_540_cast_fp16 = mul(x = rotated_9_cast_fp16, y = sin)[name = tensor("op_540_cast_fp16")]; + tensor roped_9_cast_fp16 = add(x = var_539_cast_fp16, y = var_540_cast_fp16)[name = tensor("roped_9_cast_fp16")]; + tensor var_553_begin_0 = const()[name = tensor("op_553_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_553_end_0 = const()[name = tensor("op_553_end_0"), val = tensor([1, 32, 64, 64])]; + tensor var_553_end_mask_0 = const()[name = tensor("op_553_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_553_cast_fp16 = slice_by_index(begin = var_553_begin_0, end = var_553_end_0, end_mask = var_553_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_559_begin_0 = const()[name = tensor("op_559_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_559_end_0 = const()[name = tensor("op_559_end_0"), val = tensor([1, 32, 128, 64])]; + tensor var_559_end_mask_0 = const()[name = tensor("op_559_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_559_cast_fp16 = slice_by_index(begin = var_559_begin_0, end = var_559_end_0, end_mask = var_559_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_559_cast_fp16")]; + tensor const_19_promoted_to_fp16 = const()[name = tensor("const_19_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = const_19_promoted_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_453, interleave = rotated_interleave_0, values = (var_561_cast_fp16, var_553_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_564_cast_fp16 = mul(x = k_19_cast_fp16, y = cos)[name = tensor("op_564_cast_fp16")]; + tensor var_565_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_565_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_564_cast_fp16, y = var_565_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor q_interleave_0 = const()[name = tensor("q_interleave_0"), val = tensor(false)]; + tensor q_cast_fp16 = concat(axis = var_453, interleave = q_interleave_0, values = roped_9_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor k_21_interleave_0 = const()[name = tensor("k_21_interleave_0"), val = tensor(false)]; + tensor new_k_cache_2 = concat(axis = var_453, interleave = k_21_interleave_0, values = roped_cast_fp16)[name = tensor("k_21_cast_fp16")]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor k_cast_fp16 = concat(axis = var_455, interleave = k_interleave_0, values = (k_cache_2, new_k_cache_2))[name = tensor("k_cast_fp16")]; + tensor v_interleave_0 = const()[name = tensor("v_interleave_0"), val = tensor(false)]; + tensor v_cast_fp16 = concat(axis = var_455, interleave = v_interleave_0, values = (v_cache_2, new_v_cache_2))[name = tensor("v_cast_fp16")]; + tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_588_cast_fp16 = mul(x = q_cast_fp16, y = var_587_to_fp16)[name = tensor("op_588_cast_fp16")]; + tensor attn_weights_9_transpose_x_0 = const()[name = tensor("attn_weights_9_transpose_x_0"), val = tensor(true)]; + tensor attn_weights_9_transpose_y_0 = const()[name = tensor("attn_weights_9_transpose_y_0"), val = tensor(false)]; + tensor attn_weights_9_cast_fp16 = matmul(transpose_x = attn_weights_9_transpose_x_0, transpose_y = attn_weights_9_transpose_y_0, x = var_588_cast_fp16, y = k_cast_fp16)[name = tensor("attn_weights_9_cast_fp16")]; + tensor attn_weights_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = mask)[name = tensor("attn_weights_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_448, x = attn_weights_cast_fp16)[name = tensor("op_596_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = v_cast_fp16, y = var_596_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 4096, 1, -1])]; + tensor input_17_cast_fp16 = reshape(shape = var_600, x = attn_5_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608_cast_fp16 = conv(dilations = var_606, groups = var_462, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = blocks_2_attn_proj_weight_palettized_cast_fp16, x = input_17_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor blocks_2_attn_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_attn_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303803776)))]; + tensor attention_output_cast_fp16 = mul(x = var_608_cast_fp16, y = blocks_2_attn_proj_output_scales_to_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = attention_output_cast_fp16, y = x_29_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_617_cast_fp16 = mul(x = x_39_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_mean(axes = var_618, keep_dims = var_463, x = var_617_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_621_cast_fp16 = add(x = norm_x_cast_fp16, y = var_620_to_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_622_epsilon_0_to_fp16 = const()[name = tensor("op_622_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_622_cast_fp16 = rsqrt(epsilon = var_622_epsilon_0_to_fp16, x = var_621_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor x_normed_21_cast_fp16 = mul(x = x_39_cast_fp16, y = var_622_cast_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_2_norm_2_weight_to_fp16 = const()[name = tensor("blocks_2_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303812032)))]; + tensor input_19_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_2_norm_2_weight_to_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_634 = const()[name = tensor("op_634"), val = tensor([1, 1])]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 1])]; + tensor var_638_pad_type_0 = const()[name = tensor("op_638_pad_type_0"), val = tensor("custom")]; + tensor var_638_pad_0 = const()[name = tensor("op_638_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_638_cast_fp16 = conv(dilations = var_636, groups = var_462, pad = var_638_pad_0, pad_type = var_638_pad_type_0, strides = var_634, weight = blocks_2_mlp_fc_1_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_638_cast_fp16")]; + tensor blocks_2_mlp_fc_1_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_1_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303820288)))]; + tensor input_21_cast_fp16 = mul(x = var_638_cast_fp16, y = blocks_2_mlp_fc_1_output_scales_to_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor([1, 1])]; + tensor var_644 = const()[name = tensor("op_644"), val = tensor([1, 1])]; + tensor var_646_pad_type_0 = const()[name = tensor("op_646_pad_type_0"), val = tensor("custom")]; + tensor var_646_pad_0 = const()[name = tensor("op_646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_646_cast_fp16 = conv(dilations = var_644, groups = var_462, pad = var_646_pad_0, pad_type = var_646_pad_type_0, strides = var_642, weight = blocks_2_mlp_fc_2_weight_palettized_cast_fp16, x = input_19_cast_fp16)[name = tensor("op_646_cast_fp16")]; + tensor blocks_2_mlp_fc_2_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_fc_2_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303842368)))]; + tensor x_fc_2_cast_fp16 = mul(x = var_646_cast_fp16, y = blocks_2_mlp_fc_2_output_scales_to_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_648_cast_fp16 = silu(x = input_21_cast_fp16)[name = tensor("op_648_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_648_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_652 = const()[name = tensor("op_652"), val = tensor([1, 1])]; + tensor var_654 = const()[name = tensor("op_654"), val = tensor([1, 1])]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("custom")]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_cast_fp16 = conv(dilations = var_654, groups = var_462, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_652, weight = blocks_2_mlp_proj_weight_palettized_cast_fp16, x = input_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor blocks_2_mlp_proj_output_scales_to_fp16 = const()[name = tensor("blocks_2_mlp_proj_output_scales_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303864448)))]; + tensor var_657_cast_fp16 = mul(x = var_656_cast_fp16, y = blocks_2_mlp_proj_output_scales_to_fp16)[name = tensor("op_657_cast_fp16")]; + tensor new_x = add(x = var_657_cast_fp16, y = x_39_cast_fp16)[name = tensor("op_658_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_k_cache_1, new_k_cache_2, new_v_cache_0, new_v_cache_1, new_v_cache_2); +} \ No newline at end of file diff --git a/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin b/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..271764a94ff3da74677061db9896a4da55baeeca --- /dev/null +++ b/Llama-2-7b-hf_chunk9.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1aa227b7ef525860924ae4fb4768a8514bfc00a4226c543aba73f22f68c656 +size 303872704 diff --git a/generation-cache-processor.mlmodelc/analytics/coremldata.bin b/generation-cache-processor.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..d37d971fd6791ba6ac3c0001f54eb9a8cf6af39a --- /dev/null +++ b/generation-cache-processor.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c961b8e84308365368c6798dfa3395fd75fc5caf4efe68d2362b4cc93bb7602 +size 243 diff --git a/generation-cache-processor.mlmodelc/coremldata.bin b/generation-cache-processor.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..934d5fe36b997a2711ad9e52cec88f23f97a82e7 --- /dev/null +++ b/generation-cache-processor.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2847ce6c3125df3cdc89c7e7762d1bd4e40ed33c31e8b58c49bffbd795e10ec +size 520 diff --git a/generation-cache-processor.mlmodelc/metadata.json b/generation-cache-processor.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..e418bcb5abba3fccc3160e04c17a8bf7cbae94da --- /dev/null +++ b/generation-cache-processor.mlmodelc/metadata.json @@ -0,0 +1,109 @@ +[ + { + "metadataOutputVersion" : "3.0", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "generation_k_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "generation_v_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "ignore_me_im_only_here_so_this_runs_on_the_ane", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "SliceByIndex" : 2, + "Ios16.mul" : 1, + "Concat" : 2, + "Ios16.reduceMin" : 1 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.3.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "old_k_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_k_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 448)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 448]", + "name" : "old_v_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 64)", + "shortDescription" : "", + "shape" : "[1, 32, 128, 64]", + "name" : "new_v_cache", + "type" : "MultiArray" + } + ], + "generatedClassName" : "generation_cache_processor", + "method" : "predict" + } +] \ No newline at end of file diff --git a/generation-cache-processor.mlmodelc/model.mil b/generation-cache-processor.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..de506f6cacb07dee288aa8cd31c61cd28f439dde --- /dev/null +++ b/generation-cache-processor.mlmodelc/model.mil @@ -0,0 +1,24 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.3.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor new_k_cache, tensor new_v_cache, tensor old_k_cache, tensor old_v_cache) { + tensor var_6 = const()[name = tensor("op_6"), val = tensor(-1)]; + tensor cat_k_1_interleave_0 = const()[name = tensor("cat_k_1_interleave_0"), val = tensor(false)]; + tensor cat_k_1_cast_fp16 = concat(axis = var_6, interleave = cat_k_1_interleave_0, values = (old_k_cache, new_k_cache))[name = tensor("cat_k_1_cast_fp16")]; + tensor var_9 = const()[name = tensor("op_9"), val = tensor(-1)]; + tensor cat_v_interleave_0 = const()[name = tensor("cat_v_interleave_0"), val = tensor(false)]; + tensor cat_v_cast_fp16 = concat(axis = var_9, interleave = cat_v_interleave_0, values = (old_v_cache, new_v_cache))[name = tensor("cat_v_cast_fp16")]; + tensor cat_k_begin_0 = const()[name = tensor("cat_k_begin_0"), val = tensor([0, 0, 0, 1])]; + tensor cat_k_end_0 = const()[name = tensor("cat_k_end_0"), val = tensor([1, 32, 128, 449])]; + tensor cat_k_end_mask_0 = const()[name = tensor("cat_k_end_mask_0"), val = tensor([true, true, true, false])]; + tensor generation_k_cache = slice_by_index(begin = cat_k_begin_0, end = cat_k_end_0, end_mask = cat_k_end_mask_0, x = cat_k_1_cast_fp16)[name = tensor("cat_k_cast_fp16")]; + tensor var_50_begin_0 = const()[name = tensor("op_50_begin_0"), val = tensor([0, 0, 0, 1])]; + tensor var_50_end_0 = const()[name = tensor("op_50_end_0"), val = tensor([1, 32, 128, 449])]; + tensor var_50_end_mask_0 = const()[name = tensor("op_50_end_mask_0"), val = tensor([true, true, true, false])]; + tensor generation_v_cache = slice_by_index(begin = var_50_begin_0, end = var_50_end_0, end_mask = var_50_end_mask_0, x = cat_v_cast_fp16)[name = tensor("op_50_cast_fp16")]; + tensor var_51_promoted_to_fp16 = const()[name = tensor("op_51_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor prod_cast_fp16 = mul(x = generation_k_cache, y = var_51_promoted_to_fp16)[name = tensor("prod_cast_fp16")]; + tensor var_53_keep_dims_0 = const()[name = tensor("op_53_keep_dims_0"), val = tensor(false)]; + tensor ignore_me_im_only_here_so_this_runs_on_the_ane = reduce_min(keep_dims = var_53_keep_dims_0, x = prod_cast_fp16)[name = tensor("op_53_cast_fp16")]; + } -> (generation_k_cache, generation_v_cache, ignore_me_im_only_here_so_this_runs_on_the_ane); +} \ No newline at end of file diff --git a/logit-processor.mlmodelc/analytics/coremldata.bin b/logit-processor.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..d407de484c78aebeccfa508c0d0d3d77438f2de8 --- /dev/null +++ b/logit-processor.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13535ae56a04b77d84a922d0ff10bc425569df59eca00e4d93fcdc2c995c8c34 +size 243 diff --git a/logit-processor.mlmodelc/coremldata.bin b/logit-processor.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0cf4b0b8ee474c2b0a9b82c8bf7319a7edea5e7 --- /dev/null +++ b/logit-processor.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d87990cc182dc5d1a7fbc0b6161cd300a7b8c59b6488ffd941c21f86c7db6ea +size 311 diff --git a/logit-processor.mlmodelc/metadata.json b/logit-processor.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..ca20ac850d0091b1ce07cc347c2a8e7988cb474b --- /dev/null +++ b/logit-processor.mlmodelc/metadata.json @@ -0,0 +1,56 @@ +[ + { + "metadataOutputVersion" : "3.0", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 64]", + "name" : "argmax", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Ios16.reduceArgmax" : 1 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "7.2" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 32000)", + "shortDescription" : "", + "shape" : "[1, 64, 32000]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "generatedClassName" : "logit_processor", + "method" : "predict" + } +] \ No newline at end of file diff --git a/logit-processor.mlmodelc/model.mil b/logit-processor.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..d49b081fc3052d7e05171ccc76ac7ee373b1e2e8 --- /dev/null +++ b/logit-processor.mlmodelc/model.mil @@ -0,0 +1,9 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor logits) { + tensor var_2 = const()[name = tensor("op_2"), val = tensor(-1)]; + tensor var_3 = const()[name = tensor("op_3"), val = tensor(false)]; + tensor argmax = reduce_argmax(axis = var_2, keep_dims = var_3, x = logits)[name = tensor("op_4_cast_fp16")]; + } -> (argmax); +} \ No newline at end of file