import torch from diffusers import FluxPipeline pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16) pipe.load_lora_weights("mikaelh/flux-sanna-marin-lora-v0.3-fp8", weight_name="pytorch_lora_weights.safetensors") # Quantization is slow but necessary if VRAM is limited to 24 GB if 1: from optimum.quanto import freeze, qfloat8, qint8, quantize weight_quant = qfloat8 # Quantize transformer and text encoder similar to SimpleTuner quantize(pipe.transformer, weights=weight_quant) freeze(pipe.transformer) quantize(pipe.text_encoder, weights=weight_quant) freeze(pipe.text_encoder) quantize(pipe.text_encoder_2, weights=weight_quant) freeze(pipe.text_encoder_2) pipe.enable_model_cpu_offload() prompt = "closeup of sanna marin" out = pipe( prompt=prompt, guidance_scale=3.5, height=1024, width=1024, num_inference_steps=20, ).images[0] out.save("image.png")