English
Inference Endpoints
John6666 commited on
Commit
7ec0924
·
verified ·
1 Parent(s): f992f08

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +7 -7
handler.py CHANGED
@@ -9,7 +9,7 @@ import torch
9
  from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int8_weight
10
  from huggingface_hub import hf_hub_download
11
 
12
- IS_COMPILE = False
13
  IS_TURBO = False
14
 
15
  if IS_COMPILE:
@@ -34,9 +34,9 @@ def load_pipeline_compile(repo_id: str, dtype: torch.dtype) -> Any:
34
  pipe.transformer.fuse_qkv_projections()
35
  pipe.vae.fuse_qkv_projections()
36
  pipe.transformer.to(memory_format=torch.channels_last)
37
- pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
38
  pipe.vae.to(memory_format=torch.channels_last)
39
- pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
40
  pipe.to("cuda")
41
  return pipe
42
 
@@ -45,9 +45,9 @@ def load_pipeline_autoquant(repo_id: str, dtype: torch.dtype) -> Any:
45
  pipe.transformer.fuse_qkv_projections()
46
  pipe.vae.fuse_qkv_projections()
47
  pipe.transformer.to(memory_format=torch.channels_last)
48
- pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
49
  pipe.vae.to(memory_format=torch.channels_last)
50
- pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True)
51
  pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
52
  pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
53
  pipe.to("cuda")
@@ -75,9 +75,9 @@ def load_pipeline_turbo_compile(repo_id: str, dtype: torch.dtype) -> Any:
75
  quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
76
  quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
77
  pipe.transformer.to(memory_format=torch.channels_last)
78
- pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
79
  pipe.vae.to(memory_format=torch.channels_last)
80
- pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
81
  pipe.to("cuda")
82
  return pipe
83
 
 
9
  from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int8_weight
10
  from huggingface_hub import hf_hub_download
11
 
12
+ IS_COMPILE = True
13
  IS_TURBO = False
14
 
15
  if IS_COMPILE:
 
34
  pipe.transformer.fuse_qkv_projections()
35
  pipe.vae.fuse_qkv_projections()
36
  pipe.transformer.to(memory_format=torch.channels_last)
37
+ pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
38
  pipe.vae.to(memory_format=torch.channels_last)
39
+ pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
40
  pipe.to("cuda")
41
  return pipe
42
 
 
45
  pipe.transformer.fuse_qkv_projections()
46
  pipe.vae.fuse_qkv_projections()
47
  pipe.transformer.to(memory_format=torch.channels_last)
48
+ pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True, backend="eager")
49
  pipe.vae.to(memory_format=torch.channels_last)
50
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=True, backend="eager")
51
  pipe.transformer = autoquant(pipe.transformer, error_on_unseen=False)
52
  pipe.vae = autoquant(pipe.vae, error_on_unseen=False)
53
  pipe.to("cuda")
 
75
  quantize_(pipe.transformer, int8_dynamic_activation_int8_weight(), device="cuda")
76
  quantize_(pipe.vae, int8_dynamic_activation_int8_weight(), device="cuda")
77
  pipe.transformer.to(memory_format=torch.channels_last)
78
+ pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
79
  pipe.vae.to(memory_format=torch.channels_last)
80
+ pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="eager")
81
  pipe.to("cuda")
82
  return pipe
83