kernels-community
/

paged-attention

danieldk HF staff commited on 14 days ago

Commit

3dcba92

1 Parent(s): 20100e6

Rename to paged-attention

Files changed (8) hide show

build.toml CHANGED Viewed

@@ -2,7 +2,7 @@
 version = "0.0.1"
 [torch]
-name = "attention"
 src = [
   "torch-ext/registration.h",
   "torch-ext/torch_binding.cpp",

 version = "0.0.1"
 [torch]
+name = "paged_attention"
 src = [
   "torch-ext/registration.h",
   "torch-ext/torch_binding.cpp",

tests/kernels/conftest.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import List, Optional, Tuple, Union
-import attention as ops
 import pytest
 import torch
@@ -41,7 +41,7 @@ def create_kv_caches_with_random(
         raise ValueError(
             f"Does not support key cache of type fp8 with head_size {head_size}"
         )
-    from attention.platforms import current_platform
     current_platform.seed_everything(seed)
@@ -88,7 +88,7 @@ def create_kv_caches_with_random_flash(
     seed: int = 0,
     device: Optional[str] = "cuda",
 ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
-    from attention.platforms import current_platform
     current_platform.seed_everything(seed)

 from typing import List, Optional, Tuple, Union
+import paged_attention as ops
 import pytest
 import torch
         raise ValueError(
             f"Does not support key cache of type fp8 with head_size {head_size}"
         )
+    from paged_attention.platforms import current_platform
     current_platform.seed_everything(seed)
     seed: int = 0,
     device: Optional[str] = "cuda",
 ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    from paged_attention.platforms import current_platform
     current_platform.seed_everything(seed)

tests/kernels/test_attention.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import random
 from typing import List, Optional, Tuple
-import attention as ops
 import pytest
 import torch
-from attention.platforms import current_platform
 from .allclose_default import get_default_atol, get_default_rtol
 from .utils import get_max_shared_memory_bytes, opcheck

 import random
 from typing import List, Optional, Tuple
+import paged_attention as ops
 import pytest
 import torch
+from paged_attention.platforms import current_platform
 from .allclose_default import get_default_atol, get_default_rtol
 from .utils import get_max_shared_memory_bytes, opcheck

tests/kernels/test_cache.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import random
 from typing import List, Tuple
-import attention as ops
 import pytest
 import torch
-from attention.platforms import current_platform
 from .utils import DEFAULT_OPCHECK_TEST_UTILS, opcheck

 import random
 from typing import List, Tuple
+import paged_attention as ops
 import pytest
 import torch
+from paged_attention.platforms import current_platform
 from .utils import DEFAULT_OPCHECK_TEST_UTILS, opcheck

tests/kernels/utils.py CHANGED Viewed

@@ -83,7 +83,7 @@ def opcheck(
 @lru_cache(maxsize=None)
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     """Returns the maximum shared memory per thread block in bytes."""
-    from attention import ops
     max_shared_mem = ops.get_max_shared_memory_per_block_device_attribute(gpu)
     # value 0 will cause MAX_SEQ_LEN become negative and test_attention.py

 @lru_cache(maxsize=None)
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     """Returns the maximum shared memory per thread block in bytes."""
+    from paged_attention import ops
     max_shared_mem = ops.get_max_shared_memory_per_block_device_attribute(gpu)
     # value 0 will cause MAX_SEQ_LEN become negative and test_attention.py

torch-ext/{attention → paged_attention}/__init__.py RENAMED Viewed

File without changes

torch-ext/{attention → paged_attention}/_custom_ops.py RENAMED Viewed

File without changes

torch-ext/{attention → paged_attention}/platforms.py RENAMED Viewed

File without changes