[general] version = "0.0.1" [torch] name = "paged_attention" src = [ "torch-ext/registration.h", "torch-ext/torch_binding.cpp", "torch-ext/torch_binding.h" ] pyroot = "torch-ext" [kernel.cuda_utils] capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ] src = [ "cuda-utils/cuda_utils_kernels.cu", ] depends = [] [kernel.paged_attention] capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ] src = [ "paged-attention/attention/attention_dtypes.h", "paged-attention/attention/attention_generic.cuh", "paged-attention/attention/attention_kernels.cuh", "paged-attention/attention/attention_utils.cuh", "paged-attention/attention/dtype_bfloat16.cuh", "paged-attention/attention/dtype_float16.cuh", "paged-attention/attention/dtype_float32.cuh", "paged-attention/attention/dtype_fp8.cuh", "paged-attention/attention/paged_attention_v1.cu", "paged-attention/attention/paged_attention_v2.cu", "paged-attention/cache_kernels.cu", "paged-attention/cuda_compat.h", "paged-attention/dispatch_utils.h", "paged-attention/quantization/fp8/amd/hip_float8.h", "paged-attention/quantization/fp8/amd/hip_float8_impl.h", "paged-attention/quantization/fp8/amd/quant_utils.cuh", "paged-attention/quantization/fp8/nvidia/quant_utils.cuh", ] include = [ "." ] depends = [ "torch" ]