English
Inference Endpoints
John6666 commited on
Commit
4bdfc27
·
verified ·
1 Parent(s): b62093b

Upload 2 files

Browse files
Files changed (2) hide show
  1. handler.py +4 -4
  2. requirements.txt +1 -0
handler.py CHANGED
@@ -24,10 +24,10 @@ print("device name:", torch.cuda.get_device_name())
24
  print("device capability:", torch.cuda.get_device_capability())
25
 
26
  IS_TURBO = False
27
- IS_4BIT = True
28
  IS_PARA = True
29
- IS_LVRAM = True
30
- IS_COMPILE = False
31
  IS_AUTOQ = False
32
  IS_CC90 = True if torch.cuda.get_device_capability() >= (9, 0) else False
33
  IS_CC89 = True if torch.cuda.get_device_capability() >= (8, 9) else False
@@ -160,7 +160,7 @@ def load_pipeline_opt(repo_id: str, dtype: torch.dtype) -> Any:
160
  else: quantize_(vae, weight, device="cuda")
161
  vae.to(memory_format=torch.channels_last)
162
  vae = torch.compile(vae, mode="max-autotune", fullgraph=True)
163
- pipe = FluxPipeline.from_pretrained(repo_id, transformer=None, vae=None, text_encoder_2=load_te2(repo_id, dtype), torch_dtype=dtype, quantization_config=quantization_config)
164
  pipe.transformer = transformer
165
  pipe.vae = vae
166
  return pipe
 
24
  print("device capability:", torch.cuda.get_device_capability())
25
 
26
  IS_TURBO = False
27
+ IS_4BIT = False
28
  IS_PARA = True
29
+ IS_LVRAM = False
30
+ IS_COMPILE = True
31
  IS_AUTOQ = False
32
  IS_CC90 = True if torch.cuda.get_device_capability() >= (9, 0) else False
33
  IS_CC89 = True if torch.cuda.get_device_capability() >= (8, 9) else False
 
160
  else: quantize_(vae, weight, device="cuda")
161
  vae.to(memory_format=torch.channels_last)
162
  vae = torch.compile(vae, mode="max-autotune", fullgraph=True)
163
+ pipe = FluxPipeline.from_pretrained(repo_id, transformer=None, vae=None, torch_dtype=dtype, quantization_config=quantization_config)
164
  pipe.transformer = transformer
165
  pipe.vae = vae
166
  return pipe
requirements.txt CHANGED
@@ -15,6 +15,7 @@ sentencepiece
15
  protobuf
16
  triton
17
  gemlite
 
18
  para-attn
19
  bitsandbytes
20
  optimum-quanto
 
15
  protobuf
16
  triton
17
  gemlite
18
+ tabulate
19
  para-attn
20
  bitsandbytes
21
  optimum-quanto