English
Inference Endpoints
John6666 commited on
Commit
6bc302d
·
verified ·
1 Parent(s): 6a22863

Upload 3 files

Browse files
Files changed (2) hide show
  1. handler.py +9 -4
  2. requirements.txt +7 -3
handler.py CHANGED
@@ -14,9 +14,10 @@ if IS_COMPILE:
14
  #from huggingface_inference_toolkit.logging import logger
15
 
16
  def compile_pipeline(pipe) -> Any:
17
- pipe.transformer.fuse_qkv_projections()
18
  pipe.transformer.to(memory_format=torch.channels_last)
19
  pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
 
 
20
  return pipe
21
 
22
  class EndpointHandler:
@@ -24,14 +25,15 @@ class EndpointHandler:
24
  repo_id = "camenduru/FLUX.1-dev-diffusers"
25
  #repo_id = "NoMoreCopyright/FLUX.1-dev-test"
26
  dtype = torch.bfloat16
27
- quantization_config = TorchAoConfig("int8wo")
28
  vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
29
  #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
30
  self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
 
 
31
  if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
32
  self.pipeline.to("cuda")
33
 
34
- #@torch.inference_mode()
35
  def __call__(self, data: Dict[str, Any]) -> Image.Image:
36
  #logger.info(f"Received incoming request with {data=}")
37
 
@@ -63,4 +65,7 @@ class EndpointHandler:
63
  guidance_scale=guidance_scale,
64
  num_inference_steps=num_inference_steps,
65
  generator=generator,
66
- ).images[0]
 
 
 
 
14
  #from huggingface_inference_toolkit.logging import logger
15
 
16
  def compile_pipeline(pipe) -> Any:
 
17
  pipe.transformer.to(memory_format=torch.channels_last)
18
  pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
19
+ pipe.vae.to(memory_format=torch.channels_last)
20
+ pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
21
  return pipe
22
 
23
  class EndpointHandler:
 
25
  repo_id = "camenduru/FLUX.1-dev-diffusers"
26
  #repo_id = "NoMoreCopyright/FLUX.1-dev-test"
27
  dtype = torch.bfloat16
28
+ quantization_config = TorchAoConfig("int8dq")
29
  vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
30
  #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
31
  self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
32
+ self.pipeline.transformer.fuse_qkv_projections()
33
+ self.pipeline.vae.fuse_qkv_projections()
34
  if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
35
  self.pipeline.to("cuda")
36
 
 
37
  def __call__(self, data: Dict[str, Any]) -> Image.Image:
38
  #logger.info(f"Received incoming request with {data=}")
39
 
 
65
  guidance_scale=guidance_scale,
66
  num_inference_steps=num_inference_steps,
67
  generator=generator,
68
+ output_type="pil",
69
+ ).images[0]
70
+
71
+
requirements.txt CHANGED
@@ -1,11 +1,15 @@
1
  huggingface_hub
2
- torch
3
  torchvision
4
- torchao
5
  diffusers
6
  peft
7
  accelerate
8
  transformers
9
  numpy
10
  scipy
11
- Pillow
 
 
 
 
 
1
  huggingface_hub
2
+ torch==2.4.0
3
  torchvision
4
+ torchao==0.9.0
5
  diffusers
6
  peft
7
  accelerate
8
  transformers
9
  numpy
10
  scipy
11
+ Pillow
12
+ sentencepiece
13
+ protobuf
14
+ pytorch-lightning
15
+ triton