smpanaro
/

Llama-2-7b-coreml

Update Sonoma model with faster 8x8 conv and split einsum attention

dba673f 2 months ago

1.35 kB

	program(1.0)
	[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})]
	{
	func main<ios16>(tensor<fp16, [1, ?, 32000]> logits) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>>>((("DefaultShapes", {{"logits", [1, 511, 32000]}}), ("EnumeratedShapes", {{"logits_1_1_1_1_32000_", {{"logits", [1, 1, 32000]}}}, {"logits_1_1_1_2_32000_", {{"logits", [1, 2, 32000]}}}, {"logits_1_1_1_4_32000_", {{"logits", [1, 4, 32000]}}}, {"logits_1_1_1_511_32000_", {{"logits", [1, 511, 32000]}}}, {"logits_1_1_1_512_32000_", {{"logits", [1, 512, 32000]}}}, {"logits_1_1_1_64_32000_", {{"logits", [1, 64, 32000]}}}})))] {
	tensor<int32, []> var_2 = const()[name = tensor<string, []>("op_2"), val = tensor<int32, []>(-1)];
	tensor<bool, []> var_3 = const()[name = tensor<string, []>("op_3"), val = tensor<bool, []>(false)];
	tensor<int32, [1, ?]> argmax = reduce_argmax(axis = var_2, keep_dims = var_3, x = logits)[name = tensor<string, []>("op_4_cast_fp16")];
	} -> (argmax);
	}