Upload 3 files
Browse files- handler.py +9 -4
- requirements.txt +7 -3
handler.py
CHANGED
@@ -14,9 +14,10 @@ if IS_COMPILE:
|
|
14 |
#from huggingface_inference_toolkit.logging import logger
|
15 |
|
16 |
def compile_pipeline(pipe) -> Any:
|
17 |
-
pipe.transformer.fuse_qkv_projections()
|
18 |
pipe.transformer.to(memory_format=torch.channels_last)
|
19 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
|
|
|
|
20 |
return pipe
|
21 |
|
22 |
class EndpointHandler:
|
@@ -24,14 +25,15 @@ class EndpointHandler:
|
|
24 |
repo_id = "camenduru/FLUX.1-dev-diffusers"
|
25 |
#repo_id = "NoMoreCopyright/FLUX.1-dev-test"
|
26 |
dtype = torch.bfloat16
|
27 |
-
quantization_config = TorchAoConfig("
|
28 |
vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
|
29 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
30 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
|
|
|
|
31 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
32 |
self.pipeline.to("cuda")
|
33 |
|
34 |
-
#@torch.inference_mode()
|
35 |
def __call__(self, data: Dict[str, Any]) -> Image.Image:
|
36 |
#logger.info(f"Received incoming request with {data=}")
|
37 |
|
@@ -63,4 +65,7 @@ class EndpointHandler:
|
|
63 |
guidance_scale=guidance_scale,
|
64 |
num_inference_steps=num_inference_steps,
|
65 |
generator=generator,
|
66 |
-
|
|
|
|
|
|
|
|
14 |
#from huggingface_inference_toolkit.logging import logger
|
15 |
|
16 |
def compile_pipeline(pipe) -> Any:
|
|
|
17 |
pipe.transformer.to(memory_format=torch.channels_last)
|
18 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
19 |
+
pipe.vae.to(memory_format=torch.channels_last)
|
20 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
21 |
return pipe
|
22 |
|
23 |
class EndpointHandler:
|
|
|
25 |
repo_id = "camenduru/FLUX.1-dev-diffusers"
|
26 |
#repo_id = "NoMoreCopyright/FLUX.1-dev-test"
|
27 |
dtype = torch.bfloat16
|
28 |
+
quantization_config = TorchAoConfig("int8dq")
|
29 |
vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
|
30 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
31 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
32 |
+
self.pipeline.transformer.fuse_qkv_projections()
|
33 |
+
self.pipeline.vae.fuse_qkv_projections()
|
34 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
35 |
self.pipeline.to("cuda")
|
36 |
|
|
|
37 |
def __call__(self, data: Dict[str, Any]) -> Image.Image:
|
38 |
#logger.info(f"Received incoming request with {data=}")
|
39 |
|
|
|
65 |
guidance_scale=guidance_scale,
|
66 |
num_inference_steps=num_inference_steps,
|
67 |
generator=generator,
|
68 |
+
output_type="pil",
|
69 |
+
).images[0]
|
70 |
+
|
71 |
+
|
requirements.txt
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
huggingface_hub
|
2 |
-
torch
|
3 |
torchvision
|
4 |
-
torchao
|
5 |
diffusers
|
6 |
peft
|
7 |
accelerate
|
8 |
transformers
|
9 |
numpy
|
10 |
scipy
|
11 |
-
Pillow
|
|
|
|
|
|
|
|
|
|
1 |
huggingface_hub
|
2 |
+
torch==2.4.0
|
3 |
torchvision
|
4 |
+
torchao==0.9.0
|
5 |
diffusers
|
6 |
peft
|
7 |
accelerate
|
8 |
transformers
|
9 |
numpy
|
10 |
scipy
|
11 |
+
Pillow
|
12 |
+
sentencepiece
|
13 |
+
protobuf
|
14 |
+
pytorch-lightning
|
15 |
+
triton
|