Upload handler.py
Browse files- handler.py +7 -7
handler.py
CHANGED
@@ -9,7 +9,7 @@ import torch
|
|
9 |
from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int8_weight
|
10 |
from huggingface_hub import hf_hub_download
|
11 |
|
12 |
-
IS_COMPILE =
|
13 |
IS_TURBO = False
|
14 |
|
15 |
if IS_COMPILE:
|
@@ -34,9 +34,9 @@ def load_pipeline_compile(repo_id: str, dtype: torch.dtype) -> Any:
|
|
34 |
pipe.transformer.fuse_qkv_projections()
|
35 |
pipe.vae.fuse_qkv_projections()
|
36 |
pipe.transformer.to(memory_format=torch.channels_last)
|
37 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="
|
38 |
pipe.vae.to(memory_format=torch.channels_last)
|
39 |
-
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="
|
40 |
pipe.to("cuda")
|
41 |
return pipe
|
42 |
|
@@ -45,9 +45,9 @@ def load_pipeline_autoquant(repo_id: str, dtype: torch.dtype) -> Any:
|
|
45 |
pipe.transformer.fuse_qkv_projections()
|
46 |
pipe.vae.fuse_qkv_projections()
|
47 |
pipe.transformer.to(memory_format=torch.channels_last)
|
48 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
49 |
pipe.vae.to(memory_format=torch.channels_last)
|
50 |
-
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True)
|
51 |
pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
|
52 |
pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
|
53 |
pipe.to("cuda")
|
@@ -75,9 +75,9 @@ def load_pipeline_turbo_compile(repo_id: str, dtype: torch.dtype) -> Any:
|
|
75 |
quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
|
76 |
quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
|
77 |
pipe.transformer.to(memory_format=torch.channels_last)
|
78 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="
|
79 |
pipe.vae.to(memory_format=torch.channels_last)
|
80 |
-
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="
|
81 |
pipe.to("cuda")
|
82 |
return pipe
|
83 |
|
|
|
9 |
from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int8_weight
|
10 |
from huggingface_hub import hf_hub_download
|
11 |
|
12 |
+
IS_COMPILE = True
|
13 |
IS_TURBO = False
|
14 |
|
15 |
if IS_COMPILE:
|
|
|
34 |
pipe.transformer.fuse_qkv_projections()
|
35 |
pipe.vae.fuse_qkv_projections()
|
36 |
pipe.transformer.to(memory_format=torch.channels_last)
|
37 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
|
38 |
pipe.vae.to(memory_format=torch.channels_last)
|
39 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
|
40 |
pipe.to("cuda")
|
41 |
return pipe
|
42 |
|
|
|
45 |
pipe.transformer.fuse_qkv_projections()
|
46 |
pipe.vae.fuse_qkv_projections()
|
47 |
pipe.transformer.to(memory_format=torch.channels_last)
|
48 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True, backend="eager")
|
49 |
pipe.vae.to(memory_format=torch.channels_last)
|
50 |
+
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True, backend="eager")
|
51 |
pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
|
52 |
pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
|
53 |
pipe.to("cuda")
|
|
|
75 |
quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
|
76 |
quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
|
77 |
pipe.transformer.to(memory_format=torch.channels_last)
|
78 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
|
79 |
pipe.vae.to(memory_format=torch.channels_last)
|
80 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
|
81 |
pipe.to("cuda")
|
82 |
return pipe
|
83 |
|