NoMoreCopyrightOrg
/

flux-dev

Inference Endpoints

Model card Files Files and versions Community

John6666 commited on 6 days ago

Commit

fecd4a4

·

verified ·

1 Parent(s): f28bd15

Upload handler.py

Files changed (1) hide show

handler.py +4 -2

handler.py CHANGED Viewed

@@ -29,8 +29,10 @@ class EndpointHandler:
         vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
         #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
         self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
-        self.pipeline.transformer.fuse_qkv_projections().to(memory_format=torch.channels_last)
-        self.pipeline.vae.fuse_qkv_projections().to(memory_format=torch.channels_last)
         if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
         self.pipeline.to(distributed_state.device)

         vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
         #transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
         self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
+        self.pipeline.transformer.fuse_qkv_projections()
+        self.pipeline.transformer.to(memory_format=torch.channels_last)
+        self.pipeline.vae.fuse_qkv_projections()
+        self.pipeline.vae.to(memory_format=torch.channels_last)
         if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
         self.pipeline.to(distributed_state.device)