NoMoreCopyrightOrg
/

flux-test4

Inference Endpoints

Model card Files Files and versions Community

John6666 commited on 3 days ago

Commit

b7d193e

·

verified ·

1 Parent(s): 6551d57

Upload handler.py

Files changed (1) hide show

handler.py +9 -5

handler.py CHANGED Viewed

@@ -58,11 +58,15 @@ def load_pipeline_stable(repo_id: str, dtype: torch.dtype) -> Any:
     return pipe
 def load_pipeline_lowvram(repo_id: str, dtype: torch.dtype) -> Any:
-    int4_config = TorchAoConfig("int4dq")
-    float8_config = TorchAoConfig("float8dq")
-    vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype, quantization_config=float8_config)
-    transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=float8_config)
-    pipe = FluxPipeline.from_pretrained(repo_id, vae=vae, transformer=transformer, text_encoder_2=load_te2(repo_id, dtype), torch_dtype=dtype, quantization_config=int4_config)
     #pipe.transformer.fuse_qkv_projections()
     #pipe.vae.fuse_qkv_projections()
     pipe.to("cuda")

     return pipe
 def load_pipeline_lowvram(repo_id: str, dtype: torch.dtype) -> Any:
+    #int4_config = TorchAoConfig("int4dq")
+    #float8_config = TorchAoConfig("float8dq")
+    vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
+    quantize_(vae, float8_dynamic_activation_float8_weight(granularity=PerRow()), device="cuda")
+    transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype)
+    quantize_(transformer, float8_dynamic_activation_float8_weight(granularity=PerRow()), device="cuda")
+    pipe = FluxPipeline.from_pretrained(repo_id, vae=None, transformer=None, text_encoder_2=load_te2(repo_id, dtype), torch_dtype=dtype, quantization_config=int4_config)
+    pipe.transformer = transformer
+    pipe.vae = vae
     #pipe.transformer.fuse_qkv_projections()
     #pipe.vae.fuse_qkv_projections()
     pipe.to("cuda")