Upload 2 files
Browse files- handler.py +6 -6
- requirements.txt +1 -2
handler.py
CHANGED
@@ -34,9 +34,9 @@ def load_pipeline_compile(repo_id: str, dtype: torch.dtype) -> Any:
|
|
34 |
pipe.transformer.fuse_qkv_projections()
|
35 |
pipe.vae.fuse_qkv_projections()
|
36 |
pipe.transformer.to(memory_format=torch.channels_last)
|
37 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False
|
38 |
pipe.vae.to(memory_format=torch.channels_last)
|
39 |
-
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False
|
40 |
pipe.to("cuda")
|
41 |
return pipe
|
42 |
|
@@ -45,9 +45,9 @@ def load_pipeline_autoquant(repo_id: str, dtype: torch.dtype) -> Any:
|
|
45 |
pipe.transformer.fuse_qkv_projections()
|
46 |
pipe.vae.fuse_qkv_projections()
|
47 |
pipe.transformer.to(memory_format=torch.channels_last)
|
48 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True
|
49 |
pipe.vae.to(memory_format=torch.channels_last)
|
50 |
-
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True
|
51 |
pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
|
52 |
pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
|
53 |
pipe.to("cuda")
|
@@ -75,9 +75,9 @@ def load_pipeline_turbo_compile(repo_id: str, dtype: torch.dtype) -> Any:
|
|
75 |
quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
|
76 |
quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
|
77 |
pipe.transformer.to(memory_format=torch.channels_last)
|
78 |
-
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False
|
79 |
pipe.vae.to(memory_format=torch.channels_last)
|
80 |
-
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False
|
81 |
pipe.to("cuda")
|
82 |
return pipe
|
83 |
|
|
|
34 |
pipe.transformer.fuse_qkv_projections()
|
35 |
pipe.vae.fuse_qkv_projections()
|
36 |
pipe.transformer.to(memory_format=torch.channels_last)
|
37 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
38 |
pipe.vae.to(memory_format=torch.channels_last)
|
39 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
40 |
pipe.to("cuda")
|
41 |
return pipe
|
42 |
|
|
|
45 |
pipe.transformer.fuse_qkv_projections()
|
46 |
pipe.vae.fuse_qkv_projections()
|
47 |
pipe.transformer.to(memory_format=torch.channels_last)
|
48 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
49 |
pipe.vae.to(memory_format=torch.channels_last)
|
50 |
+
pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True)
|
51 |
pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
|
52 |
pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
|
53 |
pipe.to("cuda")
|
|
|
75 |
quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
|
76 |
quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
|
77 |
pipe.transformer.to(memory_format=torch.channels_last)
|
78 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
79 |
pipe.vae.to(memory_format=torch.channels_last)
|
80 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False)
|
81 |
pipe.to("cuda")
|
82 |
return pipe
|
83 |
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
huggingface_hub
|
2 |
-
torch
|
3 |
torchvision
|
4 |
torchao==0.9.0
|
5 |
diffusers==0.32.2
|
@@ -11,5 +11,4 @@ scipy
|
|
11 |
Pillow
|
12 |
sentencepiece
|
13 |
protobuf
|
14 |
-
pytorch-lightning
|
15 |
triton
|
|
|
1 |
huggingface_hub
|
2 |
+
torch>=2.4.0
|
3 |
torchvision
|
4 |
torchao==0.9.0
|
5 |
diffusers==0.32.2
|
|
|
11 |
Pillow
|
12 |
sentencepiece
|
13 |
protobuf
|
|
|
14 |
triton
|