Upload handler.py
Browse files- handler.py +4 -6
handler.py
CHANGED
@@ -4,10 +4,8 @@ from typing import Any, Dict
|
|
4 |
from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
-
from accelerate import PartialState
|
8 |
-
distributed_state = PartialState()
|
9 |
|
10 |
-
IS_COMPILE =
|
11 |
|
12 |
if IS_COMPILE:
|
13 |
import torch._dynamo
|
@@ -16,8 +14,10 @@ if IS_COMPILE:
|
|
16 |
#from huggingface_inference_toolkit.logging import logger
|
17 |
|
18 |
def compile_pipeline(pipe) -> Any:
|
|
|
19 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
20 |
-
pipe.vae
|
|
|
21 |
return pipe
|
22 |
|
23 |
class EndpointHandler:
|
@@ -30,9 +30,7 @@ class EndpointHandler:
|
|
30 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
31 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
32 |
self.pipeline.transformer.fuse_qkv_projections()
|
33 |
-
self.pipeline.transformer.to(memory_format=torch.channels_last)
|
34 |
self.pipeline.vae.fuse_qkv_projections()
|
35 |
-
self.pipeline.vae.to(memory_format=torch.channels_last)
|
36 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
37 |
self.pipeline.to(distributed_state.device)
|
38 |
|
|
|
4 |
from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
|
5 |
from PIL import Image
|
6 |
import torch
|
|
|
|
|
7 |
|
8 |
+
IS_COMPILE = True
|
9 |
|
10 |
if IS_COMPILE:
|
11 |
import torch._dynamo
|
|
|
14 |
#from huggingface_inference_toolkit.logging import logger
|
15 |
|
16 |
def compile_pipeline(pipe) -> Any:
|
17 |
+
pipe.transformer.to(memory_format=torch.channels_last)
|
18 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
19 |
+
#pipe.vae.to(memory_format=torch.channels_last)
|
20 |
+
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
21 |
return pipe
|
22 |
|
23 |
class EndpointHandler:
|
|
|
30 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
31 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
32 |
self.pipeline.transformer.fuse_qkv_projections()
|
|
|
33 |
self.pipeline.vae.fuse_qkv_projections()
|
|
|
34 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
35 |
self.pipeline.to(distributed_state.device)
|
36 |
|