unik-ml / routers /intference /stable_diffusion.py
RoniFinTech's picture
cache
6a0af53
raw
history blame
No virus
1.97 kB
# load both base & refiner
from io import BytesIO
import torch
from diffusers import DiffusionPipeline
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from cache.local_cache import ttl_cache
from config import settings
router = APIRouter()
base = DiffusionPipeline.from_pretrained(
settings.base_sd_model, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)
base.to("cuda")
# base.enable_model_cpu_offload()
base.enable_attention_slicing()
refiner = DiffusionPipeline.from_pretrained(
settings.refiner_sd_model,
text_encoder_2=base.text_encoder_2,
vae=base.vae,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
refiner.to("cuda")
# refiner.enable_model_cpu_offload()
refiner.enable_attention_slicing()
@router.get("/generate")
@ttl_cache(key_name='prompt', ttl_secs=20)
async def generate(prompt: str):
"""
generate image
"""
# Define how many steps and what % of steps to be run on each experts (80/20) here
n_steps = 40
high_noise_frac = 0.8
negative = "disfigured, ugly, bad, immature, cartoon, anime, 3d, painting, b&w, sketch, blurry, deformed, bad anatomy, poorly drawn face, mutation, multiple people."
prompt = f"single image. single model. {prompt}. zoomed in. full-body. real person. realistic. 4k. best quality."
print(prompt)
# run both experts
image = base(
prompt=prompt,
negative_prompt=negative,
num_inference_steps=n_steps,
denoising_end=high_noise_frac,
output_type="latent",
).images[0]
final_image = refiner(
prompt=prompt,
negative_prompt=negative,
num_inference_steps=n_steps,
denoising_start=high_noise_frac,
image=image,
).images[0]
memory_stream = BytesIO()
final_image.save(memory_stream, format="PNG")
memory_stream.seek(0)
return StreamingResponse(memory_stream, media_type="image/png")