diff --git a/README.md b/README.md index 7ce921d96f844bd3bd6d9860c3878114884417eb..efd71e7626b2693b6dbd55047a77f152d5f11dc0 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ --- -title: Fastsdtest -emoji: 🌍 -colorFrom: green -colorTo: gray +title: fastsdtest +app_file: app.py sdk: gradio sdk_version: 4.31.2 -app_file: app.py -pinned: false --- - -Check out the configuration reference at https://huggingface.co./docs/hub/spaces-config-reference diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..43e340da148e3973f89d5b102eb8350ad81dc886 --- /dev/null +++ b/app.py @@ -0,0 +1,502 @@ +import json +from argparse import ArgumentParser + +import constants +from backend.controlnet import controlnet_settings_from_dict +from backend.models.gen_images import ImageFormat +from backend.models.lcmdiffusion_setting import DiffusionTask +from backend.upscale.tiled_upscale import generate_upscaled_image +from constants import APP_VERSION, DEVICE +from frontend.webui.image_variations_ui import generate_image_variations +from models.interface_types import InterfaceType +from paths import FastStableDiffusionPaths +from PIL import Image +from state import get_context, get_settings +from utils import show_system_info +from backend.device import get_device_name + +parser = ArgumentParser(description=f"FAST SD CPU {constants.APP_VERSION}") +parser.add_argument( + "-s", + "--share", + action="store_true", + help="Create sharable link(Web UI)", + required=False, +) +group = parser.add_mutually_exclusive_group(required=False) +group.add_argument( + "-g", + "--gui", + action="store_true", + help="Start desktop GUI", +) +group.add_argument( + "-w", + "--webui", + action="store_true", + help="Start Web UI", +) +group.add_argument( + "-r", + "--realtime", + action="store_true", + help="Start realtime inference UI(experimental)", +) +group.add_argument( + "-v", + "--version", + action="store_true", + help="Version", +) + +parser.add_argument( + "-b", + "--benchmark", + action="store_true", + help="Run inference benchmark on the selected device", +) +parser.add_argument( + "--lcm_model_id", + type=str, + help="Model ID or path,Default stabilityai/sd-turbo", + default="stabilityai/sd-turbo", +) +parser.add_argument( + "--openvino_lcm_model_id", + type=str, + help="OpenVINO Model ID or path,Default rupeshs/sd-turbo-openvino", + default="rupeshs/sd-turbo-openvino", +) +parser.add_argument( + "--prompt", + type=str, + help="Describe the image you want to generate", + default="", +) +parser.add_argument( + "--negative_prompt", + type=str, + help="Describe what you want to exclude from the generation", + default="", +) +parser.add_argument( + "--image_height", + type=int, + help="Height of the image", + default=512, +) +parser.add_argument( + "--image_width", + type=int, + help="Width of the image", + default=512, +) +parser.add_argument( + "--inference_steps", + type=int, + help="Number of steps,default : 1", + default=1, +) +parser.add_argument( + "--guidance_scale", + type=float, + help="Guidance scale,default : 1.0", + default=1.0, +) + +parser.add_argument( + "--number_of_images", + type=int, + help="Number of images to generate ,default : 1", + default=1, +) +parser.add_argument( + "--seed", + type=int, + help="Seed,default : -1 (disabled) ", + default=-1, +) +parser.add_argument( + "--use_openvino", + action="store_true", + help="Use OpenVINO model", +) + +parser.add_argument( + "--use_offline_model", + action="store_true", + help="Use offline model", +) +parser.add_argument( + "--use_safety_checker", + action="store_true", + help="Use safety checker", +) +parser.add_argument( + "--use_lcm_lora", + action="store_true", + help="Use LCM-LoRA", +) +parser.add_argument( + "--base_model_id", + type=str, + help="LCM LoRA base model ID,Default Lykon/dreamshaper-8", + default="Lykon/dreamshaper-8", +) +parser.add_argument( + "--lcm_lora_id", + type=str, + help="LCM LoRA model ID,Default latent-consistency/lcm-lora-sdv1-5", + default="latent-consistency/lcm-lora-sdv1-5", +) +parser.add_argument( + "-i", + "--interactive", + action="store_true", + help="Interactive CLI mode", +) +parser.add_argument( + "-t", + "--use_tiny_auto_encoder", + action="store_true", + help="Use tiny auto encoder for SD (TAESD)", +) +parser.add_argument( + "-f", + "--file", + type=str, + help="Input image for img2img mode", + default="", +) +parser.add_argument( + "--img2img", + action="store_true", + help="img2img mode; requires input file via -f argument", +) +parser.add_argument( + "--batch_count", + type=int, + help="Number of sequential generations", + default=1, +) +parser.add_argument( + "--strength", + type=float, + help="Denoising strength for img2img and Image variations", + default=0.3, +) +parser.add_argument( + "--sdupscale", + action="store_true", + help="Tiled SD upscale,works only for the resolution 512x512,(2x upscale)", +) +parser.add_argument( + "--upscale", + action="store_true", + help="EDSR SD upscale ", +) +parser.add_argument( + "--custom_settings", + type=str, + help="JSON file containing custom generation settings", + default=None, +) +parser.add_argument( + "--usejpeg", + action="store_true", + help="Images will be saved as JPEG format", +) +parser.add_argument( + "--noimagesave", + action="store_true", + help="Disable image saving", +) +parser.add_argument( + "--lora", + type=str, + help="LoRA model full path e.g D:\lora_models\CuteCartoon15V-LiberteRedmodModel-Cartoon-CuteCartoonAF.safetensors", + default=None, +) +parser.add_argument( + "--lora_weight", + type=float, + help="LoRA adapter weight [0 to 1.0]", + default=0.5, +) + +args = parser.parse_args() + +if args.version: + print(APP_VERSION) + exit() + +# parser.print_help() +show_system_info() +print(f"Using device : {constants.DEVICE}") + +if args.webui: + app_settings = get_settings() +else: + app_settings = get_settings() + +print(f"Found {len(app_settings.lcm_models)} LCM models in config/lcm-models.txt") +print( + f"Found {len(app_settings.stable_diffsuion_models)} stable diffusion models in config/stable-diffusion-models.txt" +) +print( + f"Found {len(app_settings.lcm_lora_models)} LCM-LoRA models in config/lcm-lora-models.txt" +) +print( + f"Found {len(app_settings.openvino_lcm_models)} OpenVINO LCM models in config/openvino-lcm-models.txt" +) + +if args.noimagesave: + app_settings.settings.generated_images.save_image = False +else: + app_settings.settings.generated_images.save_image = True + +if not args.realtime: + # To minimize realtime mode dependencies + from backend.upscale.upscaler import upscale_image + from frontend.cli_interactive import interactive_mode + +if args.gui: + from frontend.gui.ui import start_gui + + print("Starting desktop GUI mode(Qt)") + start_gui( + [], + app_settings, + ) +elif args.webui: + from frontend.webui.ui import start_webui + + print("Starting web UI mode") + start_webui( + args.share, + ) +elif args.realtime: + from frontend.webui.realtime_ui import start_realtime_text_to_image + + print("Starting realtime text to image(EXPERIMENTAL)") + start_realtime_text_to_image(args.share) +else: + context = get_context(InterfaceType.CLI) + config = app_settings.settings + + if args.use_openvino: + config.lcm_diffusion_setting.openvino_lcm_model_id = args.openvino_lcm_model_id + else: + config.lcm_diffusion_setting.lcm_model_id = args.lcm_model_id + + config.lcm_diffusion_setting.prompt = args.prompt + config.lcm_diffusion_setting.negative_prompt = args.negative_prompt + config.lcm_diffusion_setting.image_height = args.image_height + config.lcm_diffusion_setting.image_width = args.image_width + config.lcm_diffusion_setting.guidance_scale = args.guidance_scale + config.lcm_diffusion_setting.number_of_images = args.number_of_images + config.lcm_diffusion_setting.inference_steps = args.inference_steps + config.lcm_diffusion_setting.strength = args.strength + config.lcm_diffusion_setting.seed = args.seed + config.lcm_diffusion_setting.use_openvino = args.use_openvino + config.lcm_diffusion_setting.use_tiny_auto_encoder = args.use_tiny_auto_encoder + config.lcm_diffusion_setting.use_lcm_lora = args.use_lcm_lora + config.lcm_diffusion_setting.lcm_lora.base_model_id = args.base_model_id + config.lcm_diffusion_setting.lcm_lora.lcm_lora_id = args.lcm_lora_id + config.lcm_diffusion_setting.diffusion_task = DiffusionTask.text_to_image.value + config.lcm_diffusion_setting.lora.enabled = False + config.lcm_diffusion_setting.lora.path = args.lora + config.lcm_diffusion_setting.lora.weight = args.lora_weight + config.lcm_diffusion_setting.lora.fuse = True + if config.lcm_diffusion_setting.lora.path: + config.lcm_diffusion_setting.lora.enabled = True + if args.usejpeg: + config.generated_images.format = ImageFormat.JPEG.value.upper() + if args.seed > -1: + config.lcm_diffusion_setting.use_seed = True + else: + config.lcm_diffusion_setting.use_seed = False + config.lcm_diffusion_setting.use_offline_model = args.use_offline_model + config.lcm_diffusion_setting.use_safety_checker = args.use_safety_checker + + # Read custom settings from JSON file + custom_settings = {} + if args.custom_settings: + with open(args.custom_settings) as f: + custom_settings = json.load(f) + + # Basic ControlNet settings; if ControlNet is enabled, an image is + # required even in txt2img mode + config.lcm_diffusion_setting.controlnet = None + controlnet_settings_from_dict( + config.lcm_diffusion_setting, + custom_settings, + ) + + # Interactive mode + if args.interactive: + # wrapper(interactive_mode, config, context) + config.lcm_diffusion_setting.lora.fuse = False + interactive_mode(config, context) + + # Start of non-interactive CLI image generation + if args.img2img and args.file != "": + config.lcm_diffusion_setting.init_image = Image.open(args.file) + config.lcm_diffusion_setting.diffusion_task = DiffusionTask.image_to_image.value + elif args.img2img and args.file == "": + print("Error : You need to specify a file in img2img mode") + exit() + elif args.upscale and args.file == "" and args.custom_settings == None: + print("Error : You need to specify a file in SD upscale mode") + exit() + elif ( + args.prompt == "" + and args.file == "" + and args.custom_settings == None + and not args.benchmark + ): + print("Error : You need to provide a prompt") + exit() + + if args.upscale: + # image = Image.open(args.file) + output_path = FastStableDiffusionPaths.get_upscale_filepath( + args.file, + 2, + config.generated_images.format, + ) + result = upscale_image( + context, + args.file, + output_path, + 2, + ) + # Perform Tiled SD upscale (EXPERIMENTAL) + elif args.sdupscale: + if args.use_openvino: + config.lcm_diffusion_setting.strength = 0.3 + upscale_settings = None + if custom_settings != {}: + upscale_settings = custom_settings + filepath = args.file + output_format = config.generated_images.format + if upscale_settings: + filepath = upscale_settings["source_file"] + output_format = upscale_settings["output_format"].upper() + output_path = FastStableDiffusionPaths.get_upscale_filepath( + filepath, + 2, + output_format, + ) + + generate_upscaled_image( + config, + filepath, + config.lcm_diffusion_setting.strength, + upscale_settings=upscale_settings, + context=context, + tile_overlap=32 if config.lcm_diffusion_setting.use_openvino else 16, + output_path=output_path, + image_format=output_format, + ) + exit() + # If img2img argument is set and prompt is empty, use image variations mode + elif args.img2img and args.prompt == "": + for i in range(0, args.batch_count): + generate_image_variations( + config.lcm_diffusion_setting.init_image, args.strength + ) + else: + + if args.benchmark: + print("Initializing benchmark...") + bench_lcm_setting = config.lcm_diffusion_setting + bench_lcm_setting.prompt = "a cat" + bench_lcm_setting.use_tiny_auto_encoder = False + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) + latencies = [] + + print("Starting benchmark please wait...") + for _ in range(3): + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) + latencies.append(context.latency) + + avg_latency = sum(latencies) / 3 + + bench_lcm_setting.use_tiny_auto_encoder = True + + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) + latencies = [] + for _ in range(3): + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) + latencies.append(context.latency) + + avg_latency_taesd = sum(latencies) / 3 + + benchmark_name = "" + + if config.lcm_diffusion_setting.use_openvino: + benchmark_name = "OpenVINO" + else: + benchmark_name = "PyTorch" + + bench_model_id = "" + if bench_lcm_setting.use_openvino: + bench_model_id = bench_lcm_setting.openvino_lcm_model_id + elif bench_lcm_setting.use_lcm_lora: + bench_model_id = bench_lcm_setting.lcm_lora.base_model_id + else: + bench_model_id = bench_lcm_setting.lcm_model_id + + benchmark_result = [ + ["Device", f"{DEVICE.upper()},{get_device_name()}"], + ["Stable Diffusion Model", bench_model_id], + [ + "Image Size ", + f"{bench_lcm_setting.image_width}x{bench_lcm_setting.image_height}", + ], + [ + "Inference Steps", + f"{bench_lcm_setting.inference_steps}", + ], + [ + "Benchmark Passes", + 3, + ], + [ + "Average Latency", + f"{round(avg_latency,3)} sec", + ], + [ + "Average Latency(TAESD* enabled)", + f"{round(avg_latency_taesd,3)} sec", + ], + ] + print() + print( + f" FastSD Benchmark - {benchmark_name:8} " + ) + print(f"-" * 80) + for benchmark in benchmark_result: + print(f"{benchmark[0]:35} - {benchmark[1]}") + print(f"-" * 80) + print("*TAESD - Tiny AutoEncoder for Stable Diffusion") + + else: + for i in range(0, args.batch_count): + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) diff --git a/app_settings.py b/app_settings.py new file mode 100644 index 0000000000000000000000000000000000000000..6ff71dbd208e8b747745bf7caec6abecbb09574f --- /dev/null +++ b/app_settings.py @@ -0,0 +1,94 @@ +import yaml +from os import path, makedirs +from models.settings import Settings +from paths import FastStableDiffusionPaths +from utils import get_models_from_text_file +from constants import ( + OPENVINO_LCM_MODELS_FILE, + LCM_LORA_MODELS_FILE, + SD_MODELS_FILE, + LCM_MODELS_FILE, +) +from copy import deepcopy + + +class AppSettings: + def __init__(self): + self.config_path = FastStableDiffusionPaths().get_app_settings_path() + self._stable_diffsuion_models = get_models_from_text_file( + FastStableDiffusionPaths().get_models_config_path(SD_MODELS_FILE) + ) + self._lcm_lora_models = get_models_from_text_file( + FastStableDiffusionPaths().get_models_config_path(LCM_LORA_MODELS_FILE) + ) + self._openvino_lcm_models = get_models_from_text_file( + FastStableDiffusionPaths().get_models_config_path(OPENVINO_LCM_MODELS_FILE) + ) + self._lcm_models = get_models_from_text_file( + FastStableDiffusionPaths().get_models_config_path(LCM_MODELS_FILE) + ) + self._config = None + + @property + def settings(self): + return self._config + + @property + def stable_diffsuion_models(self): + return self._stable_diffsuion_models + + @property + def openvino_lcm_models(self): + return self._openvino_lcm_models + + @property + def lcm_models(self): + return self._lcm_models + + @property + def lcm_lora_models(self): + return self._lcm_lora_models + + def load(self, skip_file=False): + if skip_file: + print("Skipping config file") + settings_dict = self._load_default() + self._config = Settings.model_validate(settings_dict) + else: + if not path.exists(self.config_path): + base_dir = path.dirname(self.config_path) + if not path.exists(base_dir): + makedirs(base_dir) + try: + print("Settings not found creating default settings") + with open(self.config_path, "w") as file: + yaml.dump( + self._load_default(), + file, + ) + except Exception as ex: + print(f"Error in creating settings : {ex}") + exit() + try: + with open(self.config_path) as file: + settings_dict = yaml.safe_load(file) + self._config = Settings.model_validate(settings_dict) + except Exception as ex: + print(f"Error in loading settings : {ex}") + + def save(self): + try: + with open(self.config_path, "w") as file: + tmp_cfg = deepcopy(self._config) + tmp_cfg.lcm_diffusion_setting.init_image = None + configurations = tmp_cfg.model_dump( + exclude=["init_image"], + ) + if configurations: + yaml.dump(configurations, file) + except Exception as ex: + print(f"Error in saving settings : {ex}") + + def _load_default(self) -> dict: + defult_config = Settings() + return defult_config.model_dump() diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/annotators/canny_control.py b/backend/annotators/canny_control.py new file mode 100644 index 0000000000000000000000000000000000000000..a9cd68d6c35180cac6e63c394add2cfac04ca283 --- /dev/null +++ b/backend/annotators/canny_control.py @@ -0,0 +1,15 @@ +import numpy as np +from backend.annotators.control_interface import ControlInterface +from cv2 import Canny +from PIL import Image + + +class CannyControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + low_threshold = 100 + high_threshold = 200 + image = np.array(image) + image = Canny(image, low_threshold, high_threshold) + image = image[:, :, None] + image = np.concatenate([image, image, image], axis=2) + return Image.fromarray(image) diff --git a/backend/annotators/control_interface.py b/backend/annotators/control_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..fc5caa62d9a1a938b11b2dc900331a2d2604c5f9 --- /dev/null +++ b/backend/annotators/control_interface.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod + +from PIL import Image + + +class ControlInterface(ABC): + @abstractmethod + def get_control_image( + self, + image: Image, + ) -> Image: + pass diff --git a/backend/annotators/depth_control.py b/backend/annotators/depth_control.py new file mode 100644 index 0000000000000000000000000000000000000000..cccba88810c9523872784c2372fca154334e1ad5 --- /dev/null +++ b/backend/annotators/depth_control.py @@ -0,0 +1,15 @@ +import numpy as np +from backend.annotators.control_interface import ControlInterface +from PIL import Image +from transformers import pipeline + + +class DepthControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + depth_estimator = pipeline("depth-estimation") + image = depth_estimator(image)["depth"] + image = np.array(image) + image = image[:, :, None] + image = np.concatenate([image, image, image], axis=2) + image = Image.fromarray(image) + return image diff --git a/backend/annotators/image_control_factory.py b/backend/annotators/image_control_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..4b2da4920974aa62e76f0a4d841478dedaf0d9b4 --- /dev/null +++ b/backend/annotators/image_control_factory.py @@ -0,0 +1,31 @@ +from backend.annotators.canny_control import CannyControl +from backend.annotators.depth_control import DepthControl +from backend.annotators.lineart_control import LineArtControl +from backend.annotators.mlsd_control import MlsdControl +from backend.annotators.normal_control import NormalControl +from backend.annotators.pose_control import PoseControl +from backend.annotators.shuffle_control import ShuffleControl +from backend.annotators.softedge_control import SoftEdgeControl + + +class ImageControlFactory: + def create_control(self, controlnet_type: str): + if controlnet_type == "Canny": + return CannyControl() + elif controlnet_type == "Pose": + return PoseControl() + elif controlnet_type == "MLSD": + return MlsdControl() + elif controlnet_type == "Depth": + return DepthControl() + elif controlnet_type == "LineArt": + return LineArtControl() + elif controlnet_type == "Shuffle": + return ShuffleControl() + elif controlnet_type == "NormalBAE": + return NormalControl() + elif controlnet_type == "SoftEdge": + return SoftEdgeControl() + else: + print("Error: Control type not implemented!") + raise Exception("Error: Control type not implemented!") diff --git a/backend/annotators/lineart_control.py b/backend/annotators/lineart_control.py new file mode 100644 index 0000000000000000000000000000000000000000..c6775b71f0a48decd66e732dd58763b198e593af --- /dev/null +++ b/backend/annotators/lineart_control.py @@ -0,0 +1,11 @@ +import numpy as np +from backend.annotators.control_interface import ControlInterface +from controlnet_aux import LineartDetector +from PIL import Image + + +class LineArtControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + processor = LineartDetector.from_pretrained("lllyasviel/Annotators") + control_image = processor(image) + return control_image diff --git a/backend/annotators/mlsd_control.py b/backend/annotators/mlsd_control.py new file mode 100644 index 0000000000000000000000000000000000000000..80c0debe0bf5b45011bd8d2b751abae5c1d53071 --- /dev/null +++ b/backend/annotators/mlsd_control.py @@ -0,0 +1,10 @@ +from backend.annotators.control_interface import ControlInterface +from controlnet_aux import MLSDdetector +from PIL import Image + + +class MlsdControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + mlsd = MLSDdetector.from_pretrained("lllyasviel/ControlNet") + image = mlsd(image) + return image diff --git a/backend/annotators/normal_control.py b/backend/annotators/normal_control.py new file mode 100644 index 0000000000000000000000000000000000000000..7f22ed68360c5cda458be0b64a0bfcc18cd7acc2 --- /dev/null +++ b/backend/annotators/normal_control.py @@ -0,0 +1,10 @@ +from backend.annotators.control_interface import ControlInterface +from controlnet_aux import NormalBaeDetector +from PIL import Image + + +class NormalControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators") + control_image = processor(image) + return control_image diff --git a/backend/annotators/pose_control.py b/backend/annotators/pose_control.py new file mode 100644 index 0000000000000000000000000000000000000000..87ca92f2a029bbc6c7187c6eaa5a65bac298677a --- /dev/null +++ b/backend/annotators/pose_control.py @@ -0,0 +1,10 @@ +from backend.annotators.control_interface import ControlInterface +from controlnet_aux import OpenposeDetector +from PIL import Image + + +class PoseControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") + image = openpose(image) + return image diff --git a/backend/annotators/shuffle_control.py b/backend/annotators/shuffle_control.py new file mode 100644 index 0000000000000000000000000000000000000000..20c6e3dabedb17f22c8a38bd5b855d9b0591a6c1 --- /dev/null +++ b/backend/annotators/shuffle_control.py @@ -0,0 +1,10 @@ +from backend.annotators.control_interface import ControlInterface +from controlnet_aux import ContentShuffleDetector +from PIL import Image + + +class ShuffleControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + shuffle_processor = ContentShuffleDetector() + image = shuffle_processor(image) + return image diff --git a/backend/annotators/softedge_control.py b/backend/annotators/softedge_control.py new file mode 100644 index 0000000000000000000000000000000000000000..d11965712472588979b76932080a74b54c72fb14 --- /dev/null +++ b/backend/annotators/softedge_control.py @@ -0,0 +1,10 @@ +from backend.annotators.control_interface import ControlInterface +from controlnet_aux import PidiNetDetector +from PIL import Image + + +class SoftEdgeControl(ControlInterface): + def get_control_image(self, image: Image) -> Image: + processor = PidiNetDetector.from_pretrained("lllyasviel/Annotators") + control_image = processor(image) + return control_image diff --git a/backend/controlnet.py b/backend/controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..a3f01e45d23654e5c6bbd8ffabc425a7f18362e8 --- /dev/null +++ b/backend/controlnet.py @@ -0,0 +1,90 @@ +import logging +from PIL import Image +from diffusers import ControlNetModel +from backend.models.lcmdiffusion_setting import ( + DiffusionTask, + ControlNetSetting, +) + + +# Prepares ControlNet adapters for use with FastSD CPU +# +# This function loads the ControlNet adapters defined by the +# _lcm_diffusion_setting.controlnet_ object and returns a dictionary +# with the pipeline arguments required to use the loaded adapters +def load_controlnet_adapters(lcm_diffusion_setting) -> dict: + controlnet_args = {} + if ( + lcm_diffusion_setting.controlnet is None + or not lcm_diffusion_setting.controlnet.enabled + ): + return controlnet_args + + logging.info("Loading ControlNet adapter") + controlnet_adapter = ControlNetModel.from_single_file( + lcm_diffusion_setting.controlnet.adapter_path, + local_files_only=True, + use_safetensors=True, + ) + controlnet_args["controlnet"] = controlnet_adapter + return controlnet_args + + +# Updates the ControlNet pipeline arguments to use for image generation +# +# This function uses the contents of the _lcm_diffusion_setting.controlnet_ +# object to generate a dictionary with the corresponding pipeline arguments +# to be used for image generation; in particular, it sets the ControlNet control +# image and conditioning scale +def update_controlnet_arguments(lcm_diffusion_setting) -> dict: + controlnet_args = {} + if ( + lcm_diffusion_setting.controlnet is None + or not lcm_diffusion_setting.controlnet.enabled + ): + return controlnet_args + + controlnet_args["controlnet_conditioning_scale"] = ( + lcm_diffusion_setting.controlnet.conditioning_scale + ) + if lcm_diffusion_setting.diffusion_task == DiffusionTask.text_to_image.value: + controlnet_args["image"] = lcm_diffusion_setting.controlnet._control_image + elif lcm_diffusion_setting.diffusion_task == DiffusionTask.image_to_image.value: + controlnet_args["control_image"] = ( + lcm_diffusion_setting.controlnet._control_image + ) + return controlnet_args + + +# Helper function to adjust ControlNet settings from a dictionary +def controlnet_settings_from_dict( + lcm_diffusion_setting, + dictionary, +) -> None: + if lcm_diffusion_setting is None or dictionary is None: + logging.error("Invalid arguments!") + return + if ( + "controlnet" not in dictionary + or dictionary["controlnet"] is None + or len(dictionary["controlnet"]) == 0 + ): + logging.warning("ControlNet settings not found, ControlNet will be disabled") + lcm_diffusion_setting.controlnet = None + return + + controlnet = ControlNetSetting() + controlnet.enabled = dictionary["controlnet"][0]["enabled"] + controlnet.conditioning_scale = dictionary["controlnet"][0]["conditioning_scale"] + controlnet.adapter_path = dictionary["controlnet"][0]["adapter_path"] + controlnet._control_image = None + image_path = dictionary["controlnet"][0]["control_image"] + if controlnet.enabled: + try: + controlnet._control_image = Image.open(image_path) + except (AttributeError, FileNotFoundError) as err: + print(err) + if controlnet._control_image is None: + logging.error("Wrong ControlNet control image! Disabling ControlNet") + controlnet.enabled = False + lcm_diffusion_setting.controlnet = controlnet diff --git a/backend/device.py b/backend/device.py new file mode 100644 index 0000000000000000000000000000000000000000..cacb2a5197eae85eb2ec7e8bf1df25f6fe62202c --- /dev/null +++ b/backend/device.py @@ -0,0 +1,23 @@ +import platform +from constants import DEVICE +import torch +import openvino as ov + +core = ov.Core() + + +def is_openvino_device() -> bool: + if DEVICE.lower() == "cpu" or DEVICE.lower()[0] == "g" or DEVICE.lower()[0] == "n": + return True + else: + return False + + +def get_device_name() -> str: + if DEVICE == "cuda" or DEVICE == "mps": + default_gpu_index = torch.cuda.current_device() + return torch.cuda.get_device_name(default_gpu_index) + elif platform.system().lower() == "darwin": + return platform.processor() + elif is_openvino_device(): + return core.get_property(DEVICE.upper(), "FULL_DEVICE_NAME") diff --git a/backend/image_saver.py b/backend/image_saver.py new file mode 100644 index 0000000000000000000000000000000000000000..40322d443e903c7c5a988b750a4c96029b71246b --- /dev/null +++ b/backend/image_saver.py @@ -0,0 +1,60 @@ +import json +from os import path, mkdir +from typing import Any +from uuid import uuid4 +from backend.models.lcmdiffusion_setting import LCMDiffusionSetting +from utils import get_image_file_extension + + +def get_exclude_keys(): + exclude_keys = { + "init_image": True, + "generated_images": True, + "lora": { + "models_dir": True, + "path": True, + }, + "dirs": True, + "controlnet": { + "adapter_path": True, + }, + } + return exclude_keys + + +class ImageSaver: + @staticmethod + def save_images( + output_path: str, + images: Any, + folder_name: str = "", + format: str = "PNG", + lcm_diffusion_setting: LCMDiffusionSetting = None, + ) -> None: + gen_id = uuid4() + + for index, image in enumerate(images): + if not path.exists(output_path): + mkdir(output_path) + + if folder_name: + out_path = path.join( + output_path, + folder_name, + ) + else: + out_path = output_path + + if not path.exists(out_path): + mkdir(out_path) + image_extension = get_image_file_extension(format) + image.save(path.join(out_path, f"{gen_id}-{index+1}{image_extension}")) + if lcm_diffusion_setting: + with open(path.join(out_path, f"{gen_id}.json"), "w") as json_file: + json.dump( + lcm_diffusion_setting.model_dump( + exclude=get_exclude_keys(), + ), + json_file, + indent=4, + ) diff --git a/backend/lcm_text_to_image.py b/backend/lcm_text_to_image.py new file mode 100644 index 0000000000000000000000000000000000000000..580edd51a46d07f84aed6559fa240ba17131a0e2 --- /dev/null +++ b/backend/lcm_text_to_image.py @@ -0,0 +1,383 @@ +import gc +from math import ceil +from typing import Any + +import numpy as np +import torch +import logging +from backend.device import is_openvino_device +from backend.lora import load_lora_weight +from backend.controlnet import ( + load_controlnet_adapters, + update_controlnet_arguments, +) +from backend.models.lcmdiffusion_setting import ( + DiffusionTask, + LCMDiffusionSetting, + LCMLora, +) +from backend.openvino.pipelines import ( + get_ov_image_to_image_pipeline, + get_ov_text_to_image_pipeline, + ov_load_taesd, +) +from backend.pipelines.lcm import ( + get_image_to_image_pipeline, + get_lcm_model_pipeline, + load_taesd, +) +from backend.pipelines.lcm_lora import get_lcm_lora_pipeline +from constants import DEVICE +from diffusers import LCMScheduler +from image_ops import resize_pil_image + + +class LCMTextToImage: + def __init__( + self, + device: str = "cpu", + ) -> None: + self.pipeline = None + self.use_openvino = False + self.device = "" + self.previous_model_id = None + self.previous_use_tae_sd = False + self.previous_use_lcm_lora = False + self.previous_ov_model_id = "" + self.previous_safety_checker = False + self.previous_use_openvino = False + self.img_to_img_pipeline = None + self.is_openvino_init = False + self.previous_lora = None + self.task_type = DiffusionTask.text_to_image + self.torch_data_type = ( + torch.float32 if is_openvino_device() or DEVICE == "mps" else torch.float16 + ) + print(f"Torch datatype : {self.torch_data_type}") + + def _pipeline_to_device(self): + print(f"Pipeline device : {DEVICE}") + print(f"Pipeline dtype : {self.torch_data_type}") + self.pipeline.to( + torch_device=DEVICE, + torch_dtype=self.torch_data_type, + ) + + def _add_freeu(self): + pipeline_class = self.pipeline.__class__.__name__ + if isinstance(self.pipeline.scheduler, LCMScheduler): + if pipeline_class == "StableDiffusionPipeline": + print("Add FreeU - SD") + self.pipeline.enable_freeu( + s1=0.9, + s2=0.2, + b1=1.2, + b2=1.4, + ) + elif pipeline_class == "StableDiffusionXLPipeline": + print("Add FreeU - SDXL") + self.pipeline.enable_freeu( + s1=0.6, + s2=0.4, + b1=1.1, + b2=1.2, + ) + + def _update_lcm_scheduler_params(self): + if isinstance(self.pipeline.scheduler, LCMScheduler): + self.pipeline.scheduler = LCMScheduler.from_config( + self.pipeline.scheduler.config, + beta_start=0.001, + beta_end=0.01, + ) + + def init( + self, + device: str = "cpu", + lcm_diffusion_setting: LCMDiffusionSetting = LCMDiffusionSetting(), + ) -> None: + self.device = device + self.use_openvino = lcm_diffusion_setting.use_openvino + model_id = lcm_diffusion_setting.lcm_model_id + use_local_model = lcm_diffusion_setting.use_offline_model + use_tiny_auto_encoder = lcm_diffusion_setting.use_tiny_auto_encoder + use_lora = lcm_diffusion_setting.use_lcm_lora + lcm_lora: LCMLora = lcm_diffusion_setting.lcm_lora + ov_model_id = lcm_diffusion_setting.openvino_lcm_model_id + + if lcm_diffusion_setting.diffusion_task == DiffusionTask.image_to_image.value: + lcm_diffusion_setting.init_image = resize_pil_image( + lcm_diffusion_setting.init_image, + lcm_diffusion_setting.image_width, + lcm_diffusion_setting.image_height, + ) + + if ( + self.pipeline is None + or self.previous_model_id != model_id + or self.previous_use_tae_sd != use_tiny_auto_encoder + or self.previous_lcm_lora_base_id != lcm_lora.base_model_id + or self.previous_lcm_lora_id != lcm_lora.lcm_lora_id + or self.previous_use_lcm_lora != use_lora + or self.previous_ov_model_id != ov_model_id + or self.previous_safety_checker != lcm_diffusion_setting.use_safety_checker + or self.previous_use_openvino != lcm_diffusion_setting.use_openvino + or ( + self.use_openvino + and ( + self.previous_task_type != lcm_diffusion_setting.diffusion_task + or self.previous_lora != lcm_diffusion_setting.lora + ) + ) + or lcm_diffusion_setting.rebuild_pipeline + ): + if self.use_openvino and is_openvino_device(): + if self.pipeline: + del self.pipeline + self.pipeline = None + gc.collect() + self.is_openvino_init = True + if ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.text_to_image.value + ): + print(f"***** Init Text to image (OpenVINO) - {ov_model_id} *****") + self.pipeline = get_ov_text_to_image_pipeline( + ov_model_id, + use_local_model, + ) + elif ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.image_to_image.value + ): + print(f"***** Image to image (OpenVINO) - {ov_model_id} *****") + self.pipeline = get_ov_image_to_image_pipeline( + ov_model_id, + use_local_model, + ) + else: + if self.pipeline: + del self.pipeline + self.pipeline = None + if self.img_to_img_pipeline: + del self.img_to_img_pipeline + self.img_to_img_pipeline = None + + controlnet_args = load_controlnet_adapters(lcm_diffusion_setting) + if use_lora: + print( + f"***** Init LCM-LoRA pipeline - {lcm_lora.base_model_id} *****" + ) + self.pipeline = get_lcm_lora_pipeline( + lcm_lora.base_model_id, + lcm_lora.lcm_lora_id, + use_local_model, + torch_data_type=self.torch_data_type, + pipeline_args=controlnet_args, + ) + + else: + print(f"***** Init LCM Model pipeline - {model_id} *****") + self.pipeline = get_lcm_model_pipeline( + model_id, + use_local_model, + controlnet_args, + ) + + self.img_to_img_pipeline = get_image_to_image_pipeline(self.pipeline) + + if use_tiny_auto_encoder: + if self.use_openvino and is_openvino_device(): + print("Using Tiny Auto Encoder (OpenVINO)") + ov_load_taesd( + self.pipeline, + use_local_model, + ) + else: + print("Using Tiny Auto Encoder") + load_taesd( + self.pipeline, + use_local_model, + self.torch_data_type, + ) + load_taesd( + self.img_to_img_pipeline, + use_local_model, + self.torch_data_type, + ) + + if not self.use_openvino and not is_openvino_device(): + self._pipeline_to_device() + + if ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.image_to_image.value + and lcm_diffusion_setting.use_openvino + ): + self.pipeline.scheduler = LCMScheduler.from_config( + self.pipeline.scheduler.config, + ) + else: + self._update_lcm_scheduler_params() + + if use_lora: + self._add_freeu() + + self.previous_model_id = model_id + self.previous_ov_model_id = ov_model_id + self.previous_use_tae_sd = use_tiny_auto_encoder + self.previous_lcm_lora_base_id = lcm_lora.base_model_id + self.previous_lcm_lora_id = lcm_lora.lcm_lora_id + self.previous_use_lcm_lora = use_lora + self.previous_safety_checker = lcm_diffusion_setting.use_safety_checker + self.previous_use_openvino = lcm_diffusion_setting.use_openvino + self.previous_task_type = lcm_diffusion_setting.diffusion_task + self.previous_lora = lcm_diffusion_setting.lora.model_copy(deep=True) + lcm_diffusion_setting.rebuild_pipeline = False + if ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.text_to_image.value + ): + print(f"Pipeline : {self.pipeline}") + elif ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.image_to_image.value + ): + if self.use_openvino and is_openvino_device(): + print(f"Pipeline : {self.pipeline}") + else: + print(f"Pipeline : {self.img_to_img_pipeline}") + if self.use_openvino: + if lcm_diffusion_setting.lora.enabled: + print("Warning: Lora models not supported on OpenVINO mode") + else: + adapters = self.pipeline.get_active_adapters() + print(f"Active adapters : {adapters}") + + def _get_timesteps(self): + time_steps = self.pipeline.scheduler.config.get("timesteps") + time_steps_value = [int(time_steps)] if time_steps else None + return time_steps_value + + def generate( + self, + lcm_diffusion_setting: LCMDiffusionSetting, + reshape: bool = False, + ) -> Any: + guidance_scale = lcm_diffusion_setting.guidance_scale + img_to_img_inference_steps = lcm_diffusion_setting.inference_steps + check_step_value = int( + lcm_diffusion_setting.inference_steps * lcm_diffusion_setting.strength + ) + if ( + lcm_diffusion_setting.diffusion_task == DiffusionTask.image_to_image.value + and check_step_value < 1 + ): + img_to_img_inference_steps = ceil(1 / lcm_diffusion_setting.strength) + print( + f"Strength: {lcm_diffusion_setting.strength},{img_to_img_inference_steps}" + ) + + if lcm_diffusion_setting.use_seed: + cur_seed = lcm_diffusion_setting.seed + if self.use_openvino: + np.random.seed(cur_seed) + else: + torch.manual_seed(cur_seed) + + is_openvino_pipe = lcm_diffusion_setting.use_openvino and is_openvino_device() + if is_openvino_pipe: + print("Using OpenVINO") + if reshape and not self.is_openvino_init: + print("Reshape and compile") + self.pipeline.reshape( + batch_size=-1, + height=lcm_diffusion_setting.image_height, + width=lcm_diffusion_setting.image_width, + num_images_per_prompt=lcm_diffusion_setting.number_of_images, + ) + self.pipeline.compile() + + if self.is_openvino_init: + self.is_openvino_init = False + + if not lcm_diffusion_setting.use_safety_checker: + self.pipeline.safety_checker = None + if ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.image_to_image.value + and not is_openvino_pipe + ): + self.img_to_img_pipeline.safety_checker = None + + if ( + not lcm_diffusion_setting.use_lcm_lora + and not lcm_diffusion_setting.use_openvino + and lcm_diffusion_setting.guidance_scale != 1.0 + ): + print("Not using LCM-LoRA so setting guidance_scale 1.0") + guidance_scale = 1.0 + + controlnet_args = update_controlnet_arguments(lcm_diffusion_setting) + if lcm_diffusion_setting.use_openvino: + if ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.text_to_image.value + ): + result_images = self.pipeline( + prompt=lcm_diffusion_setting.prompt, + negative_prompt=lcm_diffusion_setting.negative_prompt, + num_inference_steps=lcm_diffusion_setting.inference_steps, + guidance_scale=guidance_scale, + width=lcm_diffusion_setting.image_width, + height=lcm_diffusion_setting.image_height, + num_images_per_prompt=lcm_diffusion_setting.number_of_images, + ).images + elif ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.image_to_image.value + ): + result_images = self.pipeline( + image=lcm_diffusion_setting.init_image, + strength=lcm_diffusion_setting.strength, + prompt=lcm_diffusion_setting.prompt, + negative_prompt=lcm_diffusion_setting.negative_prompt, + num_inference_steps=img_to_img_inference_steps * 3, + guidance_scale=guidance_scale, + num_images_per_prompt=lcm_diffusion_setting.number_of_images, + ).images + + else: + if ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.text_to_image.value + ): + result_images = self.pipeline( + prompt=lcm_diffusion_setting.prompt, + negative_prompt=lcm_diffusion_setting.negative_prompt, + num_inference_steps=lcm_diffusion_setting.inference_steps, + guidance_scale=guidance_scale, + width=lcm_diffusion_setting.image_width, + height=lcm_diffusion_setting.image_height, + num_images_per_prompt=lcm_diffusion_setting.number_of_images, + timesteps=self._get_timesteps(), + **controlnet_args, + ).images + + elif ( + lcm_diffusion_setting.diffusion_task + == DiffusionTask.image_to_image.value + ): + result_images = self.img_to_img_pipeline( + image=lcm_diffusion_setting.init_image, + strength=lcm_diffusion_setting.strength, + prompt=lcm_diffusion_setting.prompt, + negative_prompt=lcm_diffusion_setting.negative_prompt, + num_inference_steps=img_to_img_inference_steps, + guidance_scale=guidance_scale, + width=lcm_diffusion_setting.image_width, + height=lcm_diffusion_setting.image_height, + num_images_per_prompt=lcm_diffusion_setting.number_of_images, + **controlnet_args, + ).images + return result_images diff --git a/backend/lora.py b/backend/lora.py new file mode 100644 index 0000000000000000000000000000000000000000..a81dac28546cda9a8de199ea427b7f6e1ba8ccb8 --- /dev/null +++ b/backend/lora.py @@ -0,0 +1,136 @@ +import glob +from os import path +from paths import get_file_name, FastStableDiffusionPaths +from pathlib import Path + + +# A basic class to keep track of the currently loaded LoRAs and +# their weights; the diffusers funtion \c get_active_adapters() +# returns a list of adapter names but not their weights so we need +# a way to keep track of the current LoRA weights to set whenever +# a new LoRA is loaded +class _lora_info: + def __init__( + self, + path: str, + weight: float, + ): + self.path = path + self.adapter_name = get_file_name(path) + self.weight = weight + + def __del__(self): + self.path = None + self.adapter_name = None + + +_loaded_loras = [] +_current_pipeline = None + + +# This function loads a LoRA from the LoRA path setting, so it's +# possible to load multiple LoRAs by calling this function more than +# once with a different LoRA path setting; note that if you plan to +# load multiple LoRAs and dynamically change their weights, you +# might want to set the LoRA fuse option to False +def load_lora_weight( + pipeline, + lcm_diffusion_setting, +): + if not lcm_diffusion_setting.lora.path: + raise Exception("Empty lora model path") + + if not path.exists(lcm_diffusion_setting.lora.path): + raise Exception("Lora model path is invalid") + + # If the pipeline has been rebuilt since the last call, remove all + # references to previously loaded LoRAs and store the new pipeline + global _loaded_loras + global _current_pipeline + if pipeline != _current_pipeline: + for lora in _loaded_loras: + del lora + del _loaded_loras + _loaded_loras = [] + _current_pipeline = pipeline + + current_lora = _lora_info( + lcm_diffusion_setting.lora.path, + lcm_diffusion_setting.lora.weight, + ) + _loaded_loras.append(current_lora) + + if lcm_diffusion_setting.lora.enabled: + print(f"LoRA adapter name : {current_lora.adapter_name}") + pipeline.load_lora_weights( + FastStableDiffusionPaths.get_lora_models_path(), + weight_name=Path(lcm_diffusion_setting.lora.path).name, + local_files_only=True, + adapter_name=current_lora.adapter_name, + ) + update_lora_weights( + pipeline, + lcm_diffusion_setting, + ) + + if lcm_diffusion_setting.lora.fuse: + pipeline.fuse_lora() + + +def get_lora_models(root_dir: str): + lora_models = glob.glob(f"{root_dir}/**/*.safetensors", recursive=True) + lora_models_map = {} + for file_path in lora_models: + lora_name = get_file_name(file_path) + if lora_name is not None: + lora_models_map[lora_name] = file_path + return lora_models_map + + +# This function returns a list of (adapter_name, weight) tuples for the +# currently loaded LoRAs +def get_active_lora_weights(): + active_loras = [] + for lora_info in _loaded_loras: + active_loras.append( + ( + lora_info.adapter_name, + lora_info.weight, + ) + ) + return active_loras + + +# This function receives a pipeline, an lcm_diffusion_setting object and +# an optional list of updated (adapter_name, weight) tuples +def update_lora_weights( + pipeline, + lcm_diffusion_setting, + lora_weights=None, +): + global _loaded_loras + global _current_pipeline + if pipeline != _current_pipeline: + print("Wrong pipeline when trying to update LoRA weights") + return + if lora_weights: + for idx, lora in enumerate(lora_weights): + if _loaded_loras[idx].adapter_name != lora[0]: + print("Wrong adapter name in LoRA enumeration!") + continue + _loaded_loras[idx].weight = lora[1] + + adapter_names = [] + adapter_weights = [] + if lcm_diffusion_setting.use_lcm_lora: + adapter_names.append("lcm") + adapter_weights.append(1.0) + for lora in _loaded_loras: + adapter_names.append(lora.adapter_name) + adapter_weights.append(lora.weight) + pipeline.set_adapters( + adapter_names, + adapter_weights=adapter_weights, + ) + adapater_weights = zip(adapter_names, adapter_weights) + print(f"Adapters: {list(adapater_weights)}") diff --git a/backend/models/gen_images.py b/backend/models/gen_images.py new file mode 100644 index 0000000000000000000000000000000000000000..a70463adfbe1c8de15dfb70d472bd3bc24d53459 --- /dev/null +++ b/backend/models/gen_images.py @@ -0,0 +1,16 @@ +from pydantic import BaseModel +from enum import Enum, auto +from paths import FastStableDiffusionPaths + + +class ImageFormat(str, Enum): + """Image format""" + + JPEG = "jpeg" + PNG = "png" + + +class GeneratedImages(BaseModel): + path: str = FastStableDiffusionPaths.get_results_path() + format: str = ImageFormat.PNG.value.upper() + save_image: bool = True diff --git a/backend/models/lcmdiffusion_setting.py b/backend/models/lcmdiffusion_setting.py new file mode 100644 index 0000000000000000000000000000000000000000..1b99fcdffc3973ae8ecc8525cf3c076716568db0 --- /dev/null +++ b/backend/models/lcmdiffusion_setting.py @@ -0,0 +1,64 @@ +from enum import Enum +from PIL import Image +from typing import Any, Optional, Union + +from constants import LCM_DEFAULT_MODEL, LCM_DEFAULT_MODEL_OPENVINO +from paths import FastStableDiffusionPaths +from pydantic import BaseModel + + +class LCMLora(BaseModel): + base_model_id: str = "Lykon/dreamshaper-8" + lcm_lora_id: str = "latent-consistency/lcm-lora-sdv1-5" + + +class DiffusionTask(str, Enum): + """Diffusion task types""" + + text_to_image = "text_to_image" + image_to_image = "image_to_image" + + +class Lora(BaseModel): + models_dir: str = FastStableDiffusionPaths.get_lora_models_path() + path: Optional[Any] = None + weight: Optional[float] = 0.5 + fuse: bool = True + enabled: bool = False + + +class ControlNetSetting(BaseModel): + adapter_path: Optional[str] = None # ControlNet adapter path + conditioning_scale: float = 0.5 + enabled: bool = False + _control_image: Image = None # Control image, PIL image + + +class LCMDiffusionSetting(BaseModel): + lcm_model_id: str = LCM_DEFAULT_MODEL + openvino_lcm_model_id: str = LCM_DEFAULT_MODEL_OPENVINO + use_offline_model: bool = False + use_lcm_lora: bool = False + lcm_lora: Optional[LCMLora] = LCMLora() + use_tiny_auto_encoder: bool = False + use_openvino: bool = False + prompt: str = "" + negative_prompt: str = "" + init_image: Any = None + strength: Optional[float] = 0.6 + image_height: Optional[int] = 512 + image_width: Optional[int] = 512 + inference_steps: Optional[int] = 1 + guidance_scale: Optional[float] = 1 + number_of_images: Optional[int] = 1 + seed: Optional[int] = 123123 + use_seed: bool = False + use_safety_checker: bool = False + diffusion_task: str = DiffusionTask.text_to_image.value + lora: Optional[Lora] = Lora() + controlnet: Optional[Union[ControlNetSetting, list[ControlNetSetting]]] = None + dirs: dict = { + "controlnet": FastStableDiffusionPaths.get_controlnet_models_path(), + "lora": FastStableDiffusionPaths.get_lora_models_path(), + } + rebuild_pipeline: bool = False diff --git a/backend/models/upscale.py b/backend/models/upscale.py new file mode 100644 index 0000000000000000000000000000000000000000..5b9072f0b3359377f8252121afb817df2af3fe3f --- /dev/null +++ b/backend/models/upscale.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class UpscaleMode(str, Enum): + """Diffusion task types""" + + normal = "normal" + sd_upscale = "sd_upscale" diff --git a/backend/openvino/custom_ov_model_vae_decoder.py b/backend/openvino/custom_ov_model_vae_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..ef83fb079f9956c80043cab04a65e114f7e56c66 --- /dev/null +++ b/backend/openvino/custom_ov_model_vae_decoder.py @@ -0,0 +1,21 @@ +from backend.device import is_openvino_device + +if is_openvino_device(): + from optimum.intel.openvino.modeling_diffusion import OVModelVaeDecoder + + +class CustomOVModelVaeDecoder(OVModelVaeDecoder): + def __init__( + self, + model, + parent_model, + ov_config=None, + model_dir=None, + ): + super(OVModelVaeDecoder, self).__init__( + model, + parent_model, + ov_config, + "vae_decoder", + model_dir, + ) diff --git a/backend/openvino/pipelines.py b/backend/openvino/pipelines.py new file mode 100644 index 0000000000000000000000000000000000000000..62d936dd7426bbe1dd7f43376bbfa61089cf0a8a --- /dev/null +++ b/backend/openvino/pipelines.py @@ -0,0 +1,75 @@ +from constants import DEVICE, LCM_DEFAULT_MODEL_OPENVINO +from backend.tiny_decoder import get_tiny_decoder_vae_model +from typing import Any +from backend.device import is_openvino_device +from paths import get_base_folder_name + +if is_openvino_device(): + from huggingface_hub import snapshot_download + from optimum.intel.openvino.modeling_diffusion import OVBaseModel + + from optimum.intel.openvino.modeling_diffusion import ( + OVStableDiffusionPipeline, + OVStableDiffusionImg2ImgPipeline, + OVStableDiffusionXLPipeline, + OVStableDiffusionXLImg2ImgPipeline, + ) + from backend.openvino.custom_ov_model_vae_decoder import CustomOVModelVaeDecoder + + +def ov_load_taesd( + pipeline: Any, + use_local_model: bool = False, +): + taesd_dir = snapshot_download( + repo_id=get_tiny_decoder_vae_model(pipeline.__class__.__name__), + local_files_only=use_local_model, + ) + pipeline.vae_decoder = CustomOVModelVaeDecoder( + model=OVBaseModel.load_model(f"{taesd_dir}/vae_decoder/openvino_model.xml"), + parent_model=pipeline, + model_dir=taesd_dir, + ) + + +def get_ov_text_to_image_pipeline( + model_id: str = LCM_DEFAULT_MODEL_OPENVINO, + use_local_model: bool = False, +) -> Any: + if "xl" in get_base_folder_name(model_id).lower(): + pipeline = OVStableDiffusionXLPipeline.from_pretrained( + model_id, + local_files_only=use_local_model, + ov_config={"CACHE_DIR": ""}, + device=DEVICE.upper(), + ) + else: + pipeline = OVStableDiffusionPipeline.from_pretrained( + model_id, + local_files_only=use_local_model, + ov_config={"CACHE_DIR": ""}, + device=DEVICE.upper(), + ) + + return pipeline + + +def get_ov_image_to_image_pipeline( + model_id: str = LCM_DEFAULT_MODEL_OPENVINO, + use_local_model: bool = False, +) -> Any: + if "xl" in get_base_folder_name(model_id).lower(): + pipeline = OVStableDiffusionXLImg2ImgPipeline.from_pretrained( + model_id, + local_files_only=use_local_model, + ov_config={"CACHE_DIR": ""}, + device=DEVICE.upper(), + ) + else: + pipeline = OVStableDiffusionImg2ImgPipeline.from_pretrained( + model_id, + local_files_only=use_local_model, + ov_config={"CACHE_DIR": ""}, + device=DEVICE.upper(), + ) + return pipeline diff --git a/backend/pipelines/lcm.py b/backend/pipelines/lcm.py new file mode 100644 index 0000000000000000000000000000000000000000..30f0c9bd21347007e4d6d5fb93c7473590868588 --- /dev/null +++ b/backend/pipelines/lcm.py @@ -0,0 +1,100 @@ +from constants import LCM_DEFAULT_MODEL +from diffusers import ( + DiffusionPipeline, + AutoencoderTiny, + UNet2DConditionModel, + LCMScheduler, +) +import torch +from backend.tiny_decoder import get_tiny_decoder_vae_model +from typing import Any +from diffusers import ( + LCMScheduler, + StableDiffusionImg2ImgPipeline, + StableDiffusionXLImg2ImgPipeline, + AutoPipelineForText2Image, + AutoPipelineForImage2Image, + StableDiffusionControlNetPipeline, +) + + +def _get_lcm_pipeline_from_base_model( + lcm_model_id: str, + base_model_id: str, + use_local_model: bool, +): + pipeline = None + unet = UNet2DConditionModel.from_pretrained( + lcm_model_id, + torch_dtype=torch.float32, + local_files_only=use_local_model, + resume_download=True, + ) + pipeline = DiffusionPipeline.from_pretrained( + base_model_id, + unet=unet, + torch_dtype=torch.float32, + local_files_only=use_local_model, + resume_download=True, + ) + pipeline.scheduler = LCMScheduler.from_config(pipeline.scheduler.config) + return pipeline + + +def load_taesd( + pipeline: Any, + use_local_model: bool = False, + torch_data_type: torch.dtype = torch.float32, +): + vae_model = get_tiny_decoder_vae_model(pipeline.__class__.__name__) + pipeline.vae = AutoencoderTiny.from_pretrained( + vae_model, + torch_dtype=torch_data_type, + local_files_only=use_local_model, + ) + + +def get_lcm_model_pipeline( + model_id: str = LCM_DEFAULT_MODEL, + use_local_model: bool = False, + pipeline_args={}, +): + pipeline = None + if model_id == "latent-consistency/lcm-sdxl": + pipeline = _get_lcm_pipeline_from_base_model( + model_id, + "stabilityai/stable-diffusion-xl-base-1.0", + use_local_model, + ) + + elif model_id == "latent-consistency/lcm-ssd-1b": + pipeline = _get_lcm_pipeline_from_base_model( + model_id, + "segmind/SSD-1B", + use_local_model, + ) + else: + # pipeline = DiffusionPipeline.from_pretrained( + pipeline = AutoPipelineForText2Image.from_pretrained( + model_id, + local_files_only=use_local_model, + **pipeline_args, + ) + + return pipeline + + +def get_image_to_image_pipeline(pipeline: Any) -> Any: + components = pipeline.components + pipeline_class = pipeline.__class__.__name__ + if ( + pipeline_class == "LatentConsistencyModelPipeline" + or pipeline_class == "StableDiffusionPipeline" + ): + return StableDiffusionImg2ImgPipeline(**components) + elif pipeline_class == "StableDiffusionControlNetPipeline": + return AutoPipelineForImage2Image.from_pipe(pipeline) + elif pipeline_class == "StableDiffusionXLPipeline": + return StableDiffusionXLImg2ImgPipeline(**components) + else: + raise Exception(f"Unknown pipeline {pipeline_class}") diff --git a/backend/pipelines/lcm_lora.py b/backend/pipelines/lcm_lora.py new file mode 100644 index 0000000000000000000000000000000000000000..17682326d6cbd5040143dc5aed78af94602f6d13 --- /dev/null +++ b/backend/pipelines/lcm_lora.py @@ -0,0 +1,47 @@ +import torch +from diffusers import DiffusionPipeline, LCMScheduler, AutoPipelineForText2Image + + +def load_lcm_weights( + pipeline, + use_local_model, + lcm_lora_id, +): + kwargs = { + "local_files_only": use_local_model, + "weight_name": "pytorch_lora_weights.safetensors", + } + pipeline.load_lora_weights( + lcm_lora_id, + **kwargs, + adapter_name="lcm", + ) + + +def get_lcm_lora_pipeline( + base_model_id: str, + lcm_lora_id: str, + use_local_model: bool, + torch_data_type: torch.dtype, + pipeline_args={}, +): + # pipeline = DiffusionPipeline.from_pretrained( + pipeline = AutoPipelineForText2Image.from_pretrained( + base_model_id, + torch_dtype=torch_data_type, + local_files_only=use_local_model, + **pipeline_args, + ) + + load_lcm_weights( + pipeline, + use_local_model, + lcm_lora_id, + ) + + if "lcm" in lcm_lora_id.lower() or "hypersd" in lcm_lora_id.lower(): + print("LCM LoRA model detected so using recommended LCMScheduler") + pipeline.scheduler = LCMScheduler.from_config(pipeline.scheduler.config) + + pipeline.unet.to(memory_format=torch.channels_last) + return pipeline diff --git a/backend/tiny_decoder.py b/backend/tiny_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..957cfcbff65cc22b38450462e052fba50e4d764f --- /dev/null +++ b/backend/tiny_decoder.py @@ -0,0 +1,32 @@ +from constants import ( + TAESD_MODEL, + TAESDXL_MODEL, + TAESD_MODEL_OPENVINO, + TAESDXL_MODEL_OPENVINO, +) + + +def get_tiny_decoder_vae_model(pipeline_class) -> str: + print(f"Pipeline class : {pipeline_class}") + if ( + pipeline_class == "LatentConsistencyModelPipeline" + or pipeline_class == "StableDiffusionPipeline" + or pipeline_class == "StableDiffusionImg2ImgPipeline" + or pipeline_class == "StableDiffusionControlNetPipeline" + or pipeline_class == "StableDiffusionControlNetImg2ImgPipeline" + ): + return TAESD_MODEL + elif ( + pipeline_class == "StableDiffusionXLPipeline" + or pipeline_class == "StableDiffusionXLImg2ImgPipeline" + ): + return TAESDXL_MODEL + elif ( + pipeline_class == "OVStableDiffusionPipeline" + or pipeline_class == "OVStableDiffusionImg2ImgPipeline" + ): + return TAESD_MODEL_OPENVINO + elif pipeline_class == "OVStableDiffusionXLPipeline": + return TAESDXL_MODEL_OPENVINO + else: + raise Exception("No valid pipeline class found!") diff --git a/backend/upscale/edsr_upscale_onnx.py b/backend/upscale/edsr_upscale_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..f837d932b813edc1b5a215978fc1766150b7c436 --- /dev/null +++ b/backend/upscale/edsr_upscale_onnx.py @@ -0,0 +1,37 @@ +import numpy as np +import onnxruntime +from huggingface_hub import hf_hub_download +from PIL import Image + + +def upscale_edsr_2x(image_path: str): + input_image = Image.open(image_path).convert("RGB") + input_image = np.array(input_image).astype("float32") + input_image = np.transpose(input_image, (2, 0, 1)) + img_arr = np.expand_dims(input_image, axis=0) + + if np.max(img_arr) > 256: # 16-bit image + max_range = 65535 + else: + max_range = 255.0 + img = img_arr / max_range + + model_path = hf_hub_download( + repo_id="rupeshs/edsr-onnx", + filename="edsr_onnxsim_2x.onnx", + ) + sess = onnxruntime.InferenceSession(model_path) + + input_name = sess.get_inputs()[0].name + output_name = sess.get_outputs()[0].name + output = sess.run( + [output_name], + {input_name: img}, + )[0] + + result = output.squeeze() + result = result.clip(0, 1) + image_array = np.transpose(result, (1, 2, 0)) + image_array = np.uint8(image_array * 255) + upscaled_image = Image.fromarray(image_array) + return upscaled_image diff --git a/backend/upscale/tiled_upscale.py b/backend/upscale/tiled_upscale.py new file mode 100644 index 0000000000000000000000000000000000000000..4af91ff1971b9c0c2d00a3ebcddb79d677109205 --- /dev/null +++ b/backend/upscale/tiled_upscale.py @@ -0,0 +1,238 @@ +import time +import math +import logging +from PIL import Image, ImageDraw, ImageFilter +from backend.models.lcmdiffusion_setting import DiffusionTask +from context import Context +from constants import DEVICE + + +def generate_upscaled_image( + config, + input_path=None, + strength=0.3, + scale_factor=2.0, + tile_overlap=16, + upscale_settings=None, + context: Context = None, + output_path=None, + image_format="PNG", +): + if config == None or ( + input_path == None or input_path == "" and upscale_settings == None + ): + logging.error("Wrong arguments in tiled upscale function call!") + return + + # Use the upscale_settings dict if provided; otherwise, build the + # upscale_settings dict using the function arguments and default values + if upscale_settings == None: + upscale_settings = { + "source_file": input_path, + "target_file": None, + "output_format": image_format, + "strength": strength, + "scale_factor": scale_factor, + "prompt": config.lcm_diffusion_setting.prompt, + "tile_overlap": tile_overlap, + "tile_size": 256, + "tiles": [], + } + source_image = Image.open(input_path) # PIL image + else: + source_image = Image.open(upscale_settings["source_file"]) + + upscale_settings["source_image"] = source_image + + if upscale_settings["target_file"]: + result = Image.open(upscale_settings["target_file"]) + else: + result = Image.new( + mode="RGBA", + size=( + source_image.size[0] * int(upscale_settings["scale_factor"]), + source_image.size[1] * int(upscale_settings["scale_factor"]), + ), + color=(0, 0, 0, 0), + ) + upscale_settings["target_image"] = result + + # If the custom tile definition array 'tiles' is empty, proceed with the + # default tiled upscale task by defining all the possible image tiles; note + # that the actual tile size is 'tile_size' + 'tile_overlap' and the target + # image width and height are no longer constrained to multiples of 256 but + # are instead multiples of the actual tile size + if len(upscale_settings["tiles"]) == 0: + tile_size = upscale_settings["tile_size"] + scale_factor = upscale_settings["scale_factor"] + tile_overlap = upscale_settings["tile_overlap"] + total_cols = math.ceil( + source_image.size[0] / tile_size + ) # Image width / tile size + total_rows = math.ceil( + source_image.size[1] / tile_size + ) # Image height / tile size + for y in range(0, total_rows): + y_offset = tile_overlap if y > 0 else 0 # Tile mask offset + for x in range(0, total_cols): + x_offset = tile_overlap if x > 0 else 0 # Tile mask offset + x1 = x * tile_size + y1 = y * tile_size + w = tile_size + (tile_overlap if x < total_cols - 1 else 0) + h = tile_size + (tile_overlap if y < total_rows - 1 else 0) + mask_box = ( # Default tile mask box definiton + x_offset, + y_offset, + int(w * scale_factor), + int(h * scale_factor), + ) + upscale_settings["tiles"].append( + { + "x": x1, + "y": y1, + "w": w, + "h": h, + "mask_box": mask_box, + "prompt": upscale_settings["prompt"], # Use top level prompt if available + "scale_factor": scale_factor, + } + ) + + # Generate the output image tiles + for i in range(0, len(upscale_settings["tiles"])): + generate_upscaled_tile( + config, + i, + upscale_settings, + context=context, + ) + + # Save completed upscaled image + if upscale_settings["output_format"].upper() == "JPEG": + result_rgb = result.convert("RGB") + result.close() + result = result_rgb + result.save(output_path) + result.close() + source_image.close() + return + + +def get_current_tile( + config, + context, + strength, +): + config.lcm_diffusion_setting.strength = strength + config.lcm_diffusion_setting.diffusion_task = DiffusionTask.image_to_image.value + if ( + config.lcm_diffusion_setting.use_tiny_auto_encoder + and config.lcm_diffusion_setting.use_openvino + ): + config.lcm_diffusion_setting.use_tiny_auto_encoder = False + current_tile = context.generate_text_to_image( + settings=config, + reshape=True, + device=DEVICE, + save_images=False, + save_config=False, + )[0] + return current_tile + + +# Generates a single tile from the source image as defined in the +# upscale_settings["tiles"] array with the corresponding index and pastes the +# generated tile into the target image using the corresponding mask and scale +# factor; note that scale factor for the target image and the individual tiles +# can be different, this function will adjust scale factors as needed +def generate_upscaled_tile( + config, + index, + upscale_settings, + context: Context = None, +): + if config == None or upscale_settings == None: + logging.error("Wrong arguments in tile creation function call!") + return + + x = upscale_settings["tiles"][index]["x"] + y = upscale_settings["tiles"][index]["y"] + w = upscale_settings["tiles"][index]["w"] + h = upscale_settings["tiles"][index]["h"] + tile_prompt = upscale_settings["tiles"][index]["prompt"] + scale_factor = upscale_settings["scale_factor"] + tile_scale_factor = upscale_settings["tiles"][index]["scale_factor"] + target_width = int(w * tile_scale_factor) + target_height = int(h * tile_scale_factor) + strength = upscale_settings["strength"] + source_image = upscale_settings["source_image"] + target_image = upscale_settings["target_image"] + mask_image = generate_tile_mask(config, index, upscale_settings) + + config.lcm_diffusion_setting.number_of_images = 1 + config.lcm_diffusion_setting.prompt = tile_prompt + config.lcm_diffusion_setting.image_width = target_width + config.lcm_diffusion_setting.image_height = target_height + config.lcm_diffusion_setting.init_image = source_image.crop((x, y, x + w, y + h)) + + current_tile = None + print(f"[SD Upscale] Generating tile {index + 1}/{len(upscale_settings['tiles'])} ") + if tile_prompt == None or tile_prompt == "": + config.lcm_diffusion_setting.prompt = "" + config.lcm_diffusion_setting.negative_prompt = "" + current_tile = get_current_tile(config, context, strength) + else: + # Attempt to use img2img with low denoising strength to + # generate the tiles with the extra aid of a prompt + # context = get_context(InterfaceType.CLI) + current_tile = get_current_tile(config, context, strength) + + if math.isclose(scale_factor, tile_scale_factor): + target_image.paste( + current_tile, (int(x * scale_factor), int(y * scale_factor)), mask_image + ) + else: + target_image.paste( + current_tile.resize((int(w * scale_factor), int(h * scale_factor))), + (int(x * scale_factor), int(y * scale_factor)), + mask_image.resize((int(w * scale_factor), int(h * scale_factor))), + ) + mask_image.close() + current_tile.close() + config.lcm_diffusion_setting.init_image.close() + + +# Generate tile mask using the box definition in the upscale_settings["tiles"] +# array with the corresponding index; note that tile masks for the default +# tiled upscale task can be reused but that would complicate the code, so +# new tile masks are instead created for each tile +def generate_tile_mask( + config, + index, + upscale_settings, +): + scale_factor = upscale_settings["scale_factor"] + tile_overlap = upscale_settings["tile_overlap"] + tile_scale_factor = upscale_settings["tiles"][index]["scale_factor"] + w = int(upscale_settings["tiles"][index]["w"] * tile_scale_factor) + h = int(upscale_settings["tiles"][index]["h"] * tile_scale_factor) + # The Stable Diffusion pipeline automatically adjusts the output size + # to multiples of 8 pixels; the mask must be created with the same + # size as the output tile + w = w - (w % 8) + h = h - (h % 8) + mask_box = upscale_settings["tiles"][index]["mask_box"] + if mask_box == None: + # Build a default solid mask with soft/transparent edges + mask_box = ( + tile_overlap, + tile_overlap, + w - tile_overlap, + h - tile_overlap, + ) + mask_image = Image.new(mode="RGBA", size=(w, h), color=(0, 0, 0, 0)) + mask_draw = ImageDraw.Draw(mask_image) + mask_draw.rectangle(tuple(mask_box), fill=(0, 0, 0)) + mask_blur = mask_image.filter(ImageFilter.BoxBlur(tile_overlap - 1)) + mask_image.close() + return mask_blur diff --git a/backend/upscale/upscaler.py b/backend/upscale/upscaler.py new file mode 100644 index 0000000000000000000000000000000000000000..a923dfd45f45ec59e7e1a43e81e2cc1ecdcb67e2 --- /dev/null +++ b/backend/upscale/upscaler.py @@ -0,0 +1,47 @@ +from backend.models.lcmdiffusion_setting import DiffusionTask +from backend.models.upscale import UpscaleMode +from backend.upscale.edsr_upscale_onnx import upscale_edsr_2x +from backend.upscale.tiled_upscale import generate_upscaled_image +from context import Context +from PIL import Image +from state import get_settings + + +config = get_settings() + + +def upscale_image( + context: Context, + src_image_path: str, + dst_image_path: str, + scale_factor: int = 2, + upscale_mode: UpscaleMode = UpscaleMode.normal.value, +): + if upscale_mode == UpscaleMode.normal.value: + + upscaled_img = upscale_edsr_2x(src_image_path) + upscaled_img.save(dst_image_path) + print(f"Upscaled image saved {dst_image_path}") + else: + config.settings.lcm_diffusion_setting.strength = ( + 0.3 if config.settings.lcm_diffusion_setting.use_openvino else 0.1 + ) + config.settings.lcm_diffusion_setting.diffusion_task = ( + DiffusionTask.image_to_image.value + ) + + generate_upscaled_image( + config.settings, + src_image_path, + config.settings.lcm_diffusion_setting.strength, + upscale_settings=None, + context=context, + tile_overlap=( + 32 if config.settings.lcm_diffusion_setting.use_openvino else 16 + ), + output_path=dst_image_path, + image_format=config.settings.generated_images.format, + ) + print(f"Upscaled image saved {dst_image_path}") + + return [Image.open(dst_image_path)] diff --git a/constants.py b/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..a06b070f5ba1eae97c2f88e4a32e80d5edf01a63 --- /dev/null +++ b/constants.py @@ -0,0 +1,20 @@ +from os import environ + +APP_VERSION = "v1.0.0 beta 30" +LCM_DEFAULT_MODEL = "stabilityai/sd-turbo" +LCM_DEFAULT_MODEL_OPENVINO = "rupeshs/sd-turbo-openvino" +APP_NAME = "FastSD CPU" +APP_SETTINGS_FILE = "settings.yaml" +RESULTS_DIRECTORY = "results" +CONFIG_DIRECTORY = "configs" +DEVICE = environ.get("DEVICE", "cpu") +SD_MODELS_FILE = "stable-diffusion-models.txt" +LCM_LORA_MODELS_FILE = "lcm-lora-models.txt" +OPENVINO_LCM_MODELS_FILE = "openvino-lcm-models.txt" +TAESD_MODEL = "madebyollin/taesd" +TAESDXL_MODEL = "madebyollin/taesdxl" +TAESD_MODEL_OPENVINO = "deinferno/taesd-openvino" +LCM_MODELS_FILE = "lcm-models.txt" +TAESDXL_MODEL_OPENVINO = "rupeshs/taesdxl-openvino" +LORA_DIRECTORY = "lora_models" +CONTROLNET_DIRECTORY = "controlnet_models" diff --git a/context.py b/context.py new file mode 100644 index 0000000000000000000000000000000000000000..0681f30a04322c3a0694653a0580dc980174dade --- /dev/null +++ b/context.py @@ -0,0 +1,77 @@ +from typing import Any +from app_settings import Settings +from models.interface_types import InterfaceType +from backend.models.lcmdiffusion_setting import DiffusionTask +from backend.lcm_text_to_image import LCMTextToImage +from time import perf_counter +from backend.image_saver import ImageSaver +from pprint import pprint + + +class Context: + def __init__( + self, + interface_type: InterfaceType, + device="cpu", + ): + self.interface_type = interface_type.value + self.lcm_text_to_image = LCMTextToImage(device) + self._latency = 0 + + @property + def latency(self): + return self._latency + + def generate_text_to_image( + self, + settings: Settings, + reshape: bool = False, + device: str = "cpu", + save_images=True, + save_config=True, + ) -> Any: + if ( + settings.lcm_diffusion_setting.use_tiny_auto_encoder + and settings.lcm_diffusion_setting.use_openvino + ): + print( + "WARNING: Tiny AutoEncoder is not supported in Image to image mode (OpenVINO)" + ) + tick = perf_counter() + from state import get_settings + + if ( + settings.lcm_diffusion_setting.diffusion_task + == DiffusionTask.text_to_image.value + ): + settings.lcm_diffusion_setting.init_image = None + + if save_config: + get_settings().save() + + pprint(settings.lcm_diffusion_setting.model_dump()) + if not settings.lcm_diffusion_setting.lcm_lora: + return None + self.lcm_text_to_image.init( + device, + settings.lcm_diffusion_setting, + ) + images = self.lcm_text_to_image.generate( + settings.lcm_diffusion_setting, + reshape, + ) + elapsed = perf_counter() - tick + + if save_images and settings.generated_images.save_image: + ImageSaver.save_images( + settings.generated_images.path, + images=images, + lcm_diffusion_setting=settings.lcm_diffusion_setting, + format=settings.generated_images.format, + ) + self._latency = elapsed + print(f"Latency : {elapsed:.2f} seconds") + if settings.lcm_diffusion_setting.controlnet: + if settings.lcm_diffusion_setting.controlnet.enabled: + images.append(settings.lcm_diffusion_setting.controlnet._control_image) + return images diff --git a/frontend/cli_interactive.py b/frontend/cli_interactive.py new file mode 100644 index 0000000000000000000000000000000000000000..46d8cec8b476dcd432d9b962898f59a7ca756ddf --- /dev/null +++ b/frontend/cli_interactive.py @@ -0,0 +1,655 @@ +from os import path +from PIL import Image +from typing import Any + +from constants import DEVICE +from paths import FastStableDiffusionPaths +from backend.upscale.upscaler import upscale_image +from backend.controlnet import controlnet_settings_from_dict +from backend.upscale.tiled_upscale import generate_upscaled_image +from frontend.webui.image_variations_ui import generate_image_variations +from backend.lora import ( + get_active_lora_weights, + update_lora_weights, + load_lora_weight, +) +from backend.models.lcmdiffusion_setting import ( + DiffusionTask, + LCMDiffusionSetting, + ControlNetSetting, +) + + +_batch_count = 1 +_edit_lora_settings = False + + +def user_value( + value_type: type, + message: str, + default_value: Any, +) -> Any: + try: + value = value_type(input(message)) + except: + value = default_value + return value + + +def interactive_mode( + config, + context, +): + print("=============================================") + print("Welcome to FastSD CPU Interactive CLI") + print("=============================================") + while True: + print("> 1. Text to Image") + print("> 2. Image to Image") + print("> 3. Image Variations") + print("> 4. EDSR Upscale") + print("> 5. SD Upscale") + print("> 6. Edit default generation settings") + print("> 7. Edit LoRA settings") + print("> 8. Edit ControlNet settings") + print("> 9. Edit negative prompt") + print("> 10. Quit") + option = user_value( + int, + "Enter a Diffusion Task number (1): ", + 1, + ) + if option not in range(1, 11): + print("Wrong Diffusion Task number!") + exit() + + if option == 1: + interactive_txt2img( + config, + context, + ) + elif option == 2: + interactive_img2img( + config, + context, + ) + elif option == 3: + interactive_variations( + config, + context, + ) + elif option == 4: + interactive_edsr( + config, + context, + ) + elif option == 5: + interactive_sdupscale( + config, + context, + ) + elif option == 6: + interactive_settings( + config, + context, + ) + elif option == 7: + interactive_lora( + config, + context, + True, + ) + elif option == 8: + interactive_controlnet( + config, + context, + True, + ) + elif option == 9: + interactive_negative( + config, + context, + ) + elif option == 10: + exit() + + +def interactive_negative( + config, + context, +): + settings = config.lcm_diffusion_setting + print(f"Current negative prompt: '{settings.negative_prompt}'") + user_input = input("Write a negative prompt (set guidance > 1.0): ") + if user_input == "": + return + else: + settings.negative_prompt = user_input + + +def interactive_controlnet( + config, + context, + menu_flag=False, +): + """ + @param menu_flag: Indicates whether this function was called from the main + interactive CLI menu; _True_ if called from the main menu, _False_ otherwise + """ + settings = config.lcm_diffusion_setting + if not settings.controlnet: + settings.controlnet = ControlNetSetting() + + current_enabled = settings.controlnet.enabled + current_adapter_path = settings.controlnet.adapter_path + current_conditioning_scale = settings.controlnet.conditioning_scale + current_control_image = settings.controlnet._control_image + + option = input("Enable ControlNet? (y/N): ") + settings.controlnet.enabled = True if option.upper() == "Y" else False + if settings.controlnet.enabled: + option = input( + f"Enter ControlNet adapter path ({settings.controlnet.adapter_path}): " + ) + if option != "": + settings.controlnet.adapter_path = option + settings.controlnet.conditioning_scale = user_value( + float, + f"Enter ControlNet conditioning scale ({settings.controlnet.conditioning_scale}): ", + settings.controlnet.conditioning_scale, + ) + option = input( + f"Enter ControlNet control image path (Leave empty to reuse current): " + ) + if option != "": + try: + new_image = Image.open(option) + settings.controlnet._control_image = new_image + except (AttributeError, FileNotFoundError) as e: + settings.controlnet._control_image = None + if ( + not settings.controlnet.adapter_path + or not path.exists(settings.controlnet.adapter_path) + or not settings.controlnet._control_image + ): + print("Invalid ControlNet settings! Disabling ControlNet") + settings.controlnet.enabled = False + + if ( + settings.controlnet.enabled != current_enabled + or settings.controlnet.adapter_path != current_adapter_path + ): + settings.rebuild_pipeline = True + + +def interactive_lora( + config, + context, + menu_flag=False, +): + """ + @param menu_flag: Indicates whether this function was called from the main + interactive CLI menu; _True_ if called from the main menu, _False_ otherwise + """ + if context == None or context.lcm_text_to_image.pipeline == None: + print("Diffusion pipeline not initialized, please run a generation task first!") + return + + print("> 1. Change LoRA weights") + print("> 2. Load new LoRA model") + option = user_value( + int, + "Enter a LoRA option (1): ", + 1, + ) + if option not in range(1, 3): + print("Wrong LoRA option!") + return + + if option == 1: + update_weights = [] + active_weights = get_active_lora_weights() + for lora in active_weights: + weight = user_value( + float, + f"Enter a new LoRA weight for {lora[0]} ({lora[1]}): ", + lora[1], + ) + update_weights.append( + ( + lora[0], + weight, + ) + ) + if len(update_weights) > 0: + update_lora_weights( + context.lcm_text_to_image.pipeline, + config.lcm_diffusion_setting, + update_weights, + ) + elif option == 2: + # Load a new LoRA + settings = config.lcm_diffusion_setting + settings.lora.fuse = False + settings.lora.enabled = False + settings.lora.path = input("Enter LoRA model path: ") + settings.lora.weight = user_value( + float, + "Enter a LoRA weight (0.5): ", + 0.5, + ) + if not path.exists(settings.lora.path): + print("Invalid LoRA model path!") + return + settings.lora.enabled = True + load_lora_weight(context.lcm_text_to_image.pipeline, settings) + + if menu_flag: + global _edit_lora_settings + _edit_lora_settings = False + option = input("Edit LoRA settings after every generation? (y/N): ") + if option.upper() == "Y": + _edit_lora_settings = True + + +def interactive_settings( + config, + context, +): + global _batch_count + settings = config.lcm_diffusion_setting + print("Enter generation settings (leave empty to use current value)") + print("> 1. Use LCM") + print("> 2. Use LCM-Lora") + print("> 3. Use OpenVINO") + option = user_value( + int, + "Select inference model option (1): ", + 1, + ) + if option not in range(1, 4): + print("Wrong inference model option! Falling back to defaults") + return + + settings.use_lcm_lora = False + settings.use_openvino = False + if option == 1: + lcm_model_id = input(f"Enter LCM model ID ({settings.lcm_model_id}): ") + if lcm_model_id != "": + settings.lcm_model_id = lcm_model_id + elif option == 2: + settings.use_lcm_lora = True + lcm_lora_id = input( + f"Enter LCM-Lora model ID ({settings.lcm_lora.lcm_lora_id}): " + ) + if lcm_lora_id != "": + settings.lcm_lora.lcm_lora_id = lcm_lora_id + base_model_id = input( + f"Enter Base model ID ({settings.lcm_lora.base_model_id}): " + ) + if base_model_id != "": + settings.lcm_lora.base_model_id = base_model_id + elif option == 3: + settings.use_openvino = True + openvino_lcm_model_id = input( + f"Enter OpenVINO model ID ({settings.openvino_lcm_model_id}): " + ) + if openvino_lcm_model_id != "": + settings.openvino_lcm_model_id = openvino_lcm_model_id + + settings.use_offline_model = True + settings.use_tiny_auto_encoder = True + option = input("Work offline? (Y/n): ") + if option.upper() == "N": + settings.use_offline_model = False + option = input("Use Tiny Auto Encoder? (Y/n): ") + if option.upper() == "N": + settings.use_tiny_auto_encoder = False + + settings.image_width = user_value( + int, + f"Image width ({settings.image_width}): ", + settings.image_width, + ) + settings.image_height = user_value( + int, + f"Image height ({settings.image_height}): ", + settings.image_height, + ) + settings.inference_steps = user_value( + int, + f"Inference steps ({settings.inference_steps}): ", + settings.inference_steps, + ) + settings.guidance_scale = user_value( + float, + f"Guidance scale ({settings.guidance_scale}): ", + settings.guidance_scale, + ) + settings.number_of_images = user_value( + int, + f"Number of images per batch ({settings.number_of_images}): ", + settings.number_of_images, + ) + _batch_count = user_value( + int, + f"Batch count ({_batch_count}): ", + _batch_count, + ) + # output_format = user_value(int, f"Output format (PNG)", 1) + print(config.lcm_diffusion_setting) + + +def interactive_txt2img( + config, + context, +): + global _batch_count + config.lcm_diffusion_setting.diffusion_task = DiffusionTask.text_to_image.value + user_input = input("Write a prompt (write 'exit' to quit): ") + while True: + if user_input == "exit": + return + elif user_input == "": + user_input = config.lcm_diffusion_setting.prompt + config.lcm_diffusion_setting.prompt = user_input + for i in range(0, _batch_count): + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) + if _edit_lora_settings: + interactive_lora( + config, + context, + ) + user_input = input("Write a prompt: ") + + +def interactive_img2img( + config, + context, +): + global _batch_count + settings = config.lcm_diffusion_setting + settings.diffusion_task = DiffusionTask.image_to_image.value + steps = settings.inference_steps + source_path = input("Image path: ") + if source_path == "": + print("Error : You need to provide a file in img2img mode") + return + settings.strength = user_value( + float, + f"img2img strength ({settings.strength}): ", + settings.strength, + ) + settings.inference_steps = int(steps / settings.strength + 1) + user_input = input("Write a prompt (write 'exit' to quit): ") + while True: + if user_input == "exit": + settings.inference_steps = steps + return + settings.init_image = Image.open(source_path) + settings.prompt = user_input + for i in range(0, _batch_count): + context.generate_text_to_image( + settings=config, + device=DEVICE, + ) + new_path = input(f"Image path ({source_path}): ") + if new_path != "": + source_path = new_path + settings.strength = user_value( + float, + f"img2img strength ({settings.strength}): ", + settings.strength, + ) + if _edit_lora_settings: + interactive_lora( + config, + context, + ) + settings.inference_steps = int(steps / settings.strength + 1) + user_input = input("Write a prompt: ") + + +def interactive_variations( + config, + context, +): + global _batch_count + settings = config.lcm_diffusion_setting + settings.diffusion_task = DiffusionTask.image_to_image.value + steps = settings.inference_steps + source_path = input("Image path: ") + if source_path == "": + print("Error : You need to provide a file in Image variations mode") + return + settings.strength = user_value( + float, + f"Image variations strength ({settings.strength}): ", + settings.strength, + ) + settings.inference_steps = int(steps / settings.strength + 1) + while True: + settings.init_image = Image.open(source_path) + settings.prompt = "" + for i in range(0, _batch_count): + generate_image_variations( + settings.init_image, + settings.strength, + ) + if _edit_lora_settings: + interactive_lora( + config, + context, + ) + user_input = input("Continue in Image variations mode? (Y/n): ") + if user_input.upper() == "N": + settings.inference_steps = steps + return + new_path = input(f"Image path ({source_path}): ") + if new_path != "": + source_path = new_path + settings.strength = user_value( + float, + f"Image variations strength ({settings.strength}): ", + settings.strength, + ) + settings.inference_steps = int(steps / settings.strength + 1) + + +def interactive_edsr( + config, + context, +): + source_path = input("Image path: ") + if source_path == "": + print("Error : You need to provide a file in EDSR mode") + return + while True: + output_path = FastStableDiffusionPaths.get_upscale_filepath( + source_path, + 2, + config.generated_images.format, + ) + result = upscale_image( + context, + source_path, + output_path, + 2, + ) + user_input = input("Continue in EDSR upscale mode? (Y/n): ") + if user_input.upper() == "N": + return + new_path = input(f"Image path ({source_path}): ") + if new_path != "": + source_path = new_path + + +def interactive_sdupscale_settings(config): + steps = config.lcm_diffusion_setting.inference_steps + custom_settings = {} + print("> 1. Upscale whole image") + print("> 2. Define custom tiles (advanced)") + option = user_value( + int, + "Select an SD Upscale option (1): ", + 1, + ) + if option not in range(1, 3): + print("Wrong SD Upscale option!") + return + + # custom_settings["source_file"] = args.file + custom_settings["source_file"] = "" + new_path = input(f"Input image path ({custom_settings['source_file']}): ") + if new_path != "": + custom_settings["source_file"] = new_path + if custom_settings["source_file"] == "": + print("Error : You need to provide a file in SD Upscale mode") + return + custom_settings["target_file"] = None + if option == 2: + custom_settings["target_file"] = input("Image to patch: ") + if custom_settings["target_file"] == "": + print("No target file provided, upscaling whole input image instead!") + custom_settings["target_file"] = None + option = 1 + custom_settings["output_format"] = config.generated_images.format + custom_settings["strength"] = user_value( + float, + f"SD Upscale strength ({config.lcm_diffusion_setting.strength}): ", + config.lcm_diffusion_setting.strength, + ) + config.lcm_diffusion_setting.inference_steps = int( + steps / custom_settings["strength"] + 1 + ) + if option == 1: + custom_settings["scale_factor"] = user_value( + float, + f"Scale factor (2.0): ", + 2.0, + ) + custom_settings["tile_size"] = user_value( + int, + f"Split input image into tiles of the following size, in pixels (256): ", + 256, + ) + custom_settings["tile_overlap"] = user_value( + int, + f"Tile overlap, in pixels (16): ", + 16, + ) + elif option == 2: + custom_settings["scale_factor"] = user_value( + float, + "Input image to Image-to-patch scale_factor (2.0): ", + 2.0, + ) + custom_settings["tile_size"] = 256 + custom_settings["tile_overlap"] = 16 + custom_settings["prompt"] = input( + "Write a prompt describing the input image (optional): " + ) + custom_settings["tiles"] = [] + if option == 2: + add_tile = True + while add_tile: + print("=== Define custom SD Upscale tile ===") + tile_x = user_value( + int, + "Enter tile's X position: ", + 0, + ) + tile_y = user_value( + int, + "Enter tile's Y position: ", + 0, + ) + tile_w = user_value( + int, + "Enter tile's width (256): ", + 256, + ) + tile_h = user_value( + int, + "Enter tile's height (256): ", + 256, + ) + tile_scale = user_value( + float, + "Enter tile's scale factor (2.0): ", + 2.0, + ) + tile_prompt = input("Enter tile's prompt (optional): ") + custom_settings["tiles"].append( + { + "x": tile_x, + "y": tile_y, + "w": tile_w, + "h": tile_h, + "mask_box": None, + "prompt": tile_prompt, + "scale_factor": tile_scale, + } + ) + tile_option = input("Do you want to define another tile? (y/N): ") + if tile_option == "" or tile_option.upper() == "N": + add_tile = False + + return custom_settings + + +def interactive_sdupscale( + config, + context, +): + settings = config.lcm_diffusion_setting + settings.diffusion_task = DiffusionTask.image_to_image.value + settings.init_image = "" + source_path = "" + steps = settings.inference_steps + + while True: + custom_upscale_settings = None + option = input("Edit custom SD Upscale settings? (y/N): ") + if option.upper() == "Y": + config.lcm_diffusion_setting.inference_steps = steps + custom_upscale_settings = interactive_sdupscale_settings(config) + if not custom_upscale_settings: + return + source_path = custom_upscale_settings["source_file"] + else: + new_path = input(f"Image path ({source_path}): ") + if new_path != "": + source_path = new_path + if source_path == "": + print("Error : You need to provide a file in SD Upscale mode") + return + settings.strength = user_value( + float, + f"SD Upscale strength ({settings.strength}): ", + settings.strength, + ) + settings.inference_steps = int(steps / settings.strength + 1) + + output_path = FastStableDiffusionPaths.get_upscale_filepath( + source_path, + 2, + config.generated_images.format, + ) + generate_upscaled_image( + config, + source_path, + settings.strength, + upscale_settings=custom_upscale_settings, + context=context, + tile_overlap=32 if settings.use_openvino else 16, + output_path=output_path, + image_format=config.generated_images.format, + ) + user_input = input("Continue in SD Upscale mode? (Y/n): ") + if user_input.upper() == "N": + settings.inference_steps = steps + return diff --git a/frontend/gui/app_window.py b/frontend/gui/app_window.py new file mode 100644 index 0000000000000000000000000000000000000000..83a53e2cb78d8b8546156e7e536ab64190fdd78d --- /dev/null +++ b/frontend/gui/app_window.py @@ -0,0 +1,612 @@ +from PyQt5.QtWidgets import ( + QWidget, + QPushButton, + QHBoxLayout, + QVBoxLayout, + QLabel, + QLineEdit, + QMainWindow, + QSlider, + QTabWidget, + QSpacerItem, + QSizePolicy, + QComboBox, + QCheckBox, + QTextEdit, + QToolButton, + QFileDialog, +) +from PyQt5 import QtWidgets, QtCore +from PyQt5.QtGui import QPixmap, QDesktopServices +from PyQt5.QtCore import QSize, QThreadPool, Qt, QUrl + +from PIL.ImageQt import ImageQt +from constants import ( + LCM_DEFAULT_MODEL, + LCM_DEFAULT_MODEL_OPENVINO, + APP_NAME, + APP_VERSION, +) +from frontend.gui.image_generator_worker import ImageGeneratorWorker +from app_settings import AppSettings +from paths import FastStableDiffusionPaths +from frontend.utils import is_reshape_required +from context import Context +from models.interface_types import InterfaceType +from constants import DEVICE +from frontend.utils import enable_openvino_controls, get_valid_model_id +from backend.models.lcmdiffusion_setting import DiffusionTask + +# DPI scale fix +QtWidgets.QApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling, True) +QtWidgets.QApplication.setAttribute(QtCore.Qt.AA_UseHighDpiPixmaps, True) + + +class MainWindow(QMainWindow): + def __init__(self, config: AppSettings): + super().__init__() + self.config = config + # Prevent saved LoRA and ControlNet settings from being used by + # default; in GUI mode, the user must explicitly enable those + if self.config.settings.lcm_diffusion_setting.lora: + self.config.settings.lcm_diffusion_setting.lora.enabled = False + if self.config.settings.lcm_diffusion_setting.controlnet: + self.config.settings.lcm_diffusion_setting.controlnet.enabled = False + self.setWindowTitle(APP_NAME) + self.setFixedSize(QSize(600, 670)) + self.init_ui() + self.pipeline = None + self.threadpool = QThreadPool() + self.device = "cpu" + self.previous_width = 0 + self.previous_height = 0 + self.previous_model = "" + self.previous_num_of_images = 0 + self.context = Context(InterfaceType.GUI) + self.init_ui_values() + self.gen_images = [] + self.image_index = 0 + print(f"Output path : { self.config.settings.generated_images.path}") + + def init_ui_values(self): + self.lcm_model.setEnabled( + not self.config.settings.lcm_diffusion_setting.use_openvino + ) + self.guidance.setValue( + int(self.config.settings.lcm_diffusion_setting.guidance_scale * 10) + ) + self.seed_value.setEnabled(self.config.settings.lcm_diffusion_setting.use_seed) + self.safety_checker.setChecked( + self.config.settings.lcm_diffusion_setting.use_safety_checker + ) + self.use_openvino_check.setChecked( + self.config.settings.lcm_diffusion_setting.use_openvino + ) + self.width.setCurrentText( + str(self.config.settings.lcm_diffusion_setting.image_width) + ) + self.height.setCurrentText( + str(self.config.settings.lcm_diffusion_setting.image_height) + ) + self.inference_steps.setValue( + int(self.config.settings.lcm_diffusion_setting.inference_steps) + ) + self.seed_check.setChecked(self.config.settings.lcm_diffusion_setting.use_seed) + self.seed_value.setText(str(self.config.settings.lcm_diffusion_setting.seed)) + self.use_local_model_folder.setChecked( + self.config.settings.lcm_diffusion_setting.use_offline_model + ) + self.results_path.setText(self.config.settings.generated_images.path) + self.num_images.setValue( + self.config.settings.lcm_diffusion_setting.number_of_images + ) + self.use_tae_sd.setChecked( + self.config.settings.lcm_diffusion_setting.use_tiny_auto_encoder + ) + self.use_lcm_lora.setChecked( + self.config.settings.lcm_diffusion_setting.use_lcm_lora + ) + self.lcm_model.setCurrentText( + get_valid_model_id( + self.config.lcm_models, + self.config.settings.lcm_diffusion_setting.lcm_model_id, + LCM_DEFAULT_MODEL, + ) + ) + self.base_model_id.setCurrentText( + get_valid_model_id( + self.config.stable_diffsuion_models, + self.config.settings.lcm_diffusion_setting.lcm_lora.base_model_id, + ) + ) + self.lcm_lora_id.setCurrentText( + get_valid_model_id( + self.config.lcm_lora_models, + self.config.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id, + ) + ) + self.openvino_lcm_model_id.setCurrentText( + get_valid_model_id( + self.config.openvino_lcm_models, + self.config.settings.lcm_diffusion_setting.openvino_lcm_model_id, + LCM_DEFAULT_MODEL_OPENVINO, + ) + ) + self.neg_prompt.setEnabled( + self.config.settings.lcm_diffusion_setting.use_lcm_lora + or self.config.settings.lcm_diffusion_setting.use_openvino + ) + self.openvino_lcm_model_id.setEnabled( + self.config.settings.lcm_diffusion_setting.use_openvino + ) + + def init_ui(self): + self.create_main_tab() + self.create_settings_tab() + self.create_about_tab() + self.show() + + def create_main_tab(self): + self.img = QLabel("<>") + self.img.setAlignment(Qt.AlignCenter) + self.img.setFixedSize(QSize(512, 512)) + self.vspacer = QSpacerItem(20, 40, QSizePolicy.Minimum, QSizePolicy.Expanding) + + self.prompt = QTextEdit() + self.prompt.setPlaceholderText("A fantasy landscape") + self.prompt.setAcceptRichText(False) + self.neg_prompt = QTextEdit() + self.neg_prompt.setPlaceholderText("") + self.neg_prompt.setAcceptRichText(False) + self.neg_prompt_label = QLabel("Negative prompt (Set guidance scale > 1.0):") + self.generate = QPushButton("Generate") + self.generate.clicked.connect(self.text_to_image) + self.prompt.setFixedHeight(40) + self.neg_prompt.setFixedHeight(35) + self.browse_results = QPushButton("...") + self.browse_results.setFixedWidth(30) + self.browse_results.clicked.connect(self.on_open_results_folder) + self.browse_results.setToolTip("Open output folder") + + hlayout = QHBoxLayout() + hlayout.addWidget(self.neg_prompt) + hlayout.addWidget(self.generate) + hlayout.addWidget(self.browse_results) + + self.previous_img_btn = QToolButton() + self.previous_img_btn.setText("<") + self.previous_img_btn.clicked.connect(self.on_show_previous_image) + self.next_img_btn = QToolButton() + self.next_img_btn.setText(">") + self.next_img_btn.clicked.connect(self.on_show_next_image) + hlayout_nav = QHBoxLayout() + hlayout_nav.addWidget(self.previous_img_btn) + hlayout_nav.addWidget(self.img) + hlayout_nav.addWidget(self.next_img_btn) + + vlayout = QVBoxLayout() + vlayout.addLayout(hlayout_nav) + vlayout.addItem(self.vspacer) + vlayout.addWidget(self.prompt) + vlayout.addWidget(self.neg_prompt_label) + vlayout.addLayout(hlayout) + + self.tab_widget = QTabWidget(self) + self.tab_main = QWidget() + self.tab_settings = QWidget() + self.tab_about = QWidget() + self.tab_main.setLayout(vlayout) + + self.tab_widget.addTab(self.tab_main, "Text to Image") + self.tab_widget.addTab(self.tab_settings, "Settings") + self.tab_widget.addTab(self.tab_about, "About") + + self.setCentralWidget(self.tab_widget) + self.use_seed = False + + def create_settings_tab(self): + self.lcm_model_label = QLabel("Latent Consistency Model:") + # self.lcm_model = QLineEdit(LCM_DEFAULT_MODEL) + self.lcm_model = QComboBox(self) + self.lcm_model.addItems(self.config.lcm_models) + self.lcm_model.currentIndexChanged.connect(self.on_lcm_model_changed) + + self.use_lcm_lora = QCheckBox("Use LCM LoRA") + self.use_lcm_lora.setChecked(False) + self.use_lcm_lora.stateChanged.connect(self.use_lcm_lora_changed) + + self.lora_base_model_id_label = QLabel("Lora base model ID :") + self.base_model_id = QComboBox(self) + self.base_model_id.addItems(self.config.stable_diffsuion_models) + self.base_model_id.currentIndexChanged.connect(self.on_base_model_id_changed) + + self.lcm_lora_model_id_label = QLabel("LCM LoRA model ID :") + self.lcm_lora_id = QComboBox(self) + self.lcm_lora_id.addItems(self.config.lcm_lora_models) + self.lcm_lora_id.currentIndexChanged.connect(self.on_lcm_lora_id_changed) + + self.inference_steps_value = QLabel("Number of inference steps: 4") + self.inference_steps = QSlider(orientation=Qt.Orientation.Horizontal) + self.inference_steps.setMaximum(25) + self.inference_steps.setMinimum(1) + self.inference_steps.setValue(4) + self.inference_steps.valueChanged.connect(self.update_steps_label) + + self.num_images_value = QLabel("Number of images: 1") + self.num_images = QSlider(orientation=Qt.Orientation.Horizontal) + self.num_images.setMaximum(100) + self.num_images.setMinimum(1) + self.num_images.setValue(1) + self.num_images.valueChanged.connect(self.update_num_images_label) + + self.guidance_value = QLabel("Guidance scale: 1") + self.guidance = QSlider(orientation=Qt.Orientation.Horizontal) + self.guidance.setMaximum(20) + self.guidance.setMinimum(10) + self.guidance.setValue(10) + self.guidance.valueChanged.connect(self.update_guidance_label) + + self.width_value = QLabel("Width :") + self.width = QComboBox(self) + self.width.addItem("256") + self.width.addItem("512") + self.width.addItem("768") + self.width.addItem("1024") + self.width.setCurrentText("512") + self.width.currentIndexChanged.connect(self.on_width_changed) + + self.height_value = QLabel("Height :") + self.height = QComboBox(self) + self.height.addItem("256") + self.height.addItem("512") + self.height.addItem("768") + self.height.addItem("1024") + self.height.setCurrentText("512") + self.height.currentIndexChanged.connect(self.on_height_changed) + + self.seed_check = QCheckBox("Use seed") + self.seed_value = QLineEdit() + self.seed_value.setInputMask("9999999999") + self.seed_value.setText("123123") + self.seed_check.stateChanged.connect(self.seed_changed) + + self.safety_checker = QCheckBox("Use safety checker") + self.safety_checker.setChecked(True) + self.safety_checker.stateChanged.connect(self.use_safety_checker_changed) + + self.use_openvino_check = QCheckBox("Use OpenVINO") + self.use_openvino_check.setChecked(False) + self.openvino_model_label = QLabel("OpenVINO LCM model:") + self.use_local_model_folder = QCheckBox( + "Use locally cached model or downloaded model folder(offline)" + ) + self.openvino_lcm_model_id = QComboBox(self) + self.openvino_lcm_model_id.addItems(self.config.openvino_lcm_models) + self.openvino_lcm_model_id.currentIndexChanged.connect( + self.on_openvino_lcm_model_id_changed + ) + + self.use_openvino_check.setEnabled(enable_openvino_controls()) + self.use_local_model_folder.setChecked(False) + self.use_local_model_folder.stateChanged.connect(self.use_offline_model_changed) + self.use_openvino_check.stateChanged.connect(self.use_openvino_changed) + + self.use_tae_sd = QCheckBox( + "Use Tiny Auto Encoder - TAESD (Fast, moderate quality)" + ) + self.use_tae_sd.setChecked(False) + self.use_tae_sd.stateChanged.connect(self.use_tae_sd_changed) + + hlayout = QHBoxLayout() + hlayout.addWidget(self.seed_check) + hlayout.addWidget(self.seed_value) + hspacer = QSpacerItem(20, 10, QSizePolicy.Expanding, QSizePolicy.Minimum) + slider_hspacer = QSpacerItem(20, 10, QSizePolicy.Expanding, QSizePolicy.Minimum) + + self.results_path_label = QLabel("Output path:") + self.results_path = QLineEdit() + self.results_path.textChanged.connect(self.on_path_changed) + self.browse_folder_btn = QToolButton() + self.browse_folder_btn.setText("...") + self.browse_folder_btn.clicked.connect(self.on_browse_folder) + + self.reset = QPushButton("Reset All") + self.reset.clicked.connect(self.reset_all_settings) + + vlayout = QVBoxLayout() + vspacer = QSpacerItem(20, 20, QSizePolicy.Minimum, QSizePolicy.Expanding) + vlayout.addItem(hspacer) + vlayout.setSpacing(3) + vlayout.addWidget(self.lcm_model_label) + vlayout.addWidget(self.lcm_model) + vlayout.addWidget(self.use_local_model_folder) + vlayout.addWidget(self.use_lcm_lora) + vlayout.addWidget(self.lora_base_model_id_label) + vlayout.addWidget(self.base_model_id) + vlayout.addWidget(self.lcm_lora_model_id_label) + vlayout.addWidget(self.lcm_lora_id) + vlayout.addWidget(self.use_openvino_check) + vlayout.addWidget(self.openvino_model_label) + vlayout.addWidget(self.openvino_lcm_model_id) + vlayout.addWidget(self.use_tae_sd) + vlayout.addItem(slider_hspacer) + vlayout.addWidget(self.inference_steps_value) + vlayout.addWidget(self.inference_steps) + vlayout.addWidget(self.num_images_value) + vlayout.addWidget(self.num_images) + vlayout.addWidget(self.width_value) + vlayout.addWidget(self.width) + vlayout.addWidget(self.height_value) + vlayout.addWidget(self.height) + vlayout.addWidget(self.guidance_value) + vlayout.addWidget(self.guidance) + vlayout.addLayout(hlayout) + vlayout.addWidget(self.safety_checker) + + vlayout.addWidget(self.results_path_label) + hlayout_path = QHBoxLayout() + hlayout_path.addWidget(self.results_path) + hlayout_path.addWidget(self.browse_folder_btn) + vlayout.addLayout(hlayout_path) + self.tab_settings.setLayout(vlayout) + hlayout_reset = QHBoxLayout() + hspacer = QSpacerItem(20, 20, QSizePolicy.Expanding, QSizePolicy.Minimum) + hlayout_reset.addItem(hspacer) + hlayout_reset.addWidget(self.reset) + vlayout.addLayout(hlayout_reset) + vlayout.addItem(vspacer) + + def create_about_tab(self): + self.label = QLabel() + self.label.setAlignment(Qt.AlignCenter) + self.label.setText( + f"""

FastSD CPU {APP_VERSION}

+

(c)2023 - 2024 Rupesh Sreeraman

+

Faster stable diffusion on CPU

+

Based on Latent Consistency Models

+

GitHub : https://github.com/rupeshs/fastsdcpu/

""" + ) + + vlayout = QVBoxLayout() + vlayout.addWidget(self.label) + self.tab_about.setLayout(vlayout) + + def show_image(self, pixmap): + image_width = self.config.settings.lcm_diffusion_setting.image_width + image_height = self.config.settings.lcm_diffusion_setting.image_height + if image_width > 512 or image_height > 512: + new_width = 512 if image_width > 512 else image_width + new_height = 512 if image_height > 512 else image_height + self.img.setPixmap( + pixmap.scaled( + new_width, + new_height, + Qt.KeepAspectRatio, + ) + ) + else: + self.img.setPixmap(pixmap) + + def on_show_next_image(self): + if self.image_index != len(self.gen_images) - 1 and len(self.gen_images) > 0: + self.previous_img_btn.setEnabled(True) + self.image_index += 1 + self.show_image(self.gen_images[self.image_index]) + if self.image_index == len(self.gen_images) - 1: + self.next_img_btn.setEnabled(False) + + def on_open_results_folder(self): + QDesktopServices.openUrl( + QUrl.fromLocalFile(self.config.settings.generated_images.path) + ) + + def on_show_previous_image(self): + if self.image_index != 0: + self.next_img_btn.setEnabled(True) + self.image_index -= 1 + self.show_image(self.gen_images[self.image_index]) + if self.image_index == 0: + self.previous_img_btn.setEnabled(False) + + def on_path_changed(self, text): + self.config.settings.generated_images.path = text + + def on_browse_folder(self): + options = QFileDialog.Options() + options |= QFileDialog.ShowDirsOnly + + folder_path = QFileDialog.getExistingDirectory( + self, "Select a Folder", "", options=options + ) + + if folder_path: + self.config.settings.generated_images.path = folder_path + self.results_path.setText(folder_path) + + def on_width_changed(self, index): + width_txt = self.width.itemText(index) + self.config.settings.lcm_diffusion_setting.image_width = int(width_txt) + + def on_height_changed(self, index): + height_txt = self.height.itemText(index) + self.config.settings.lcm_diffusion_setting.image_height = int(height_txt) + + def on_lcm_model_changed(self, index): + model_id = self.lcm_model.itemText(index) + self.config.settings.lcm_diffusion_setting.lcm_model_id = model_id + + def on_base_model_id_changed(self, index): + model_id = self.base_model_id.itemText(index) + self.config.settings.lcm_diffusion_setting.lcm_lora.base_model_id = model_id + + def on_lcm_lora_id_changed(self, index): + model_id = self.lcm_lora_id.itemText(index) + self.config.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id = model_id + + def on_openvino_lcm_model_id_changed(self, index): + model_id = self.openvino_lcm_model_id.itemText(index) + self.config.settings.lcm_diffusion_setting.openvino_lcm_model_id = model_id + + def use_openvino_changed(self, state): + if state == 2: + self.lcm_model.setEnabled(False) + self.use_lcm_lora.setEnabled(False) + self.lcm_lora_id.setEnabled(False) + self.base_model_id.setEnabled(False) + self.neg_prompt.setEnabled(True) + self.openvino_lcm_model_id.setEnabled(True) + self.config.settings.lcm_diffusion_setting.use_openvino = True + else: + self.lcm_model.setEnabled(True) + self.use_lcm_lora.setEnabled(True) + self.lcm_lora_id.setEnabled(True) + self.base_model_id.setEnabled(True) + self.neg_prompt.setEnabled(False) + self.openvino_lcm_model_id.setEnabled(False) + self.config.settings.lcm_diffusion_setting.use_openvino = False + + def use_tae_sd_changed(self, state): + if state == 2: + self.config.settings.lcm_diffusion_setting.use_tiny_auto_encoder = True + else: + self.config.settings.lcm_diffusion_setting.use_tiny_auto_encoder = False + + def use_offline_model_changed(self, state): + if state == 2: + self.config.settings.lcm_diffusion_setting.use_offline_model = True + else: + self.config.settings.lcm_diffusion_setting.use_offline_model = False + + def use_lcm_lora_changed(self, state): + if state == 2: + self.lcm_model.setEnabled(False) + self.lcm_lora_id.setEnabled(True) + self.base_model_id.setEnabled(True) + self.neg_prompt.setEnabled(True) + self.config.settings.lcm_diffusion_setting.use_lcm_lora = True + else: + self.lcm_model.setEnabled(True) + self.lcm_lora_id.setEnabled(False) + self.base_model_id.setEnabled(False) + self.neg_prompt.setEnabled(False) + self.config.settings.lcm_diffusion_setting.use_lcm_lora = False + + def use_safety_checker_changed(self, state): + if state == 2: + self.config.settings.lcm_diffusion_setting.use_safety_checker = True + else: + self.config.settings.lcm_diffusion_setting.use_safety_checker = False + + def update_steps_label(self, value): + self.inference_steps_value.setText(f"Number of inference steps: {value}") + self.config.settings.lcm_diffusion_setting.inference_steps = value + + def update_num_images_label(self, value): + self.num_images_value.setText(f"Number of images: {value}") + self.config.settings.lcm_diffusion_setting.number_of_images = value + + def update_guidance_label(self, value): + val = round(int(value) / 10, 1) + self.guidance_value.setText(f"Guidance scale: {val}") + self.config.settings.lcm_diffusion_setting.guidance_scale = val + + def seed_changed(self, state): + if state == 2: + self.seed_value.setEnabled(True) + self.config.settings.lcm_diffusion_setting.use_seed = True + else: + self.seed_value.setEnabled(False) + self.config.settings.lcm_diffusion_setting.use_seed = False + + def get_seed_value(self) -> int: + use_seed = self.config.settings.lcm_diffusion_setting.use_seed + seed_value = int(self.seed_value.text()) if use_seed else -1 + return seed_value + + def generate_image(self): + self.config.settings.lcm_diffusion_setting.seed = self.get_seed_value() + self.config.settings.lcm_diffusion_setting.prompt = self.prompt.toPlainText() + self.config.settings.lcm_diffusion_setting.negative_prompt = ( + self.neg_prompt.toPlainText() + ) + self.config.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id = ( + self.lcm_lora_id.currentText() + ) + self.config.settings.lcm_diffusion_setting.lcm_lora.base_model_id = ( + self.base_model_id.currentText() + ) + + if self.config.settings.lcm_diffusion_setting.use_openvino: + model_id = self.openvino_lcm_model_id.currentText() + self.config.settings.lcm_diffusion_setting.openvino_lcm_model_id = model_id + else: + model_id = self.lcm_model.currentText() + self.config.settings.lcm_diffusion_setting.lcm_model_id = model_id + + reshape_required = False + if self.config.settings.lcm_diffusion_setting.use_openvino: + # Detect dimension change + reshape_required = is_reshape_required( + self.previous_width, + self.config.settings.lcm_diffusion_setting.image_width, + self.previous_height, + self.config.settings.lcm_diffusion_setting.image_height, + self.previous_model, + model_id, + self.previous_num_of_images, + self.config.settings.lcm_diffusion_setting.number_of_images, + ) + self.config.settings.lcm_diffusion_setting.diffusion_task = ( + DiffusionTask.text_to_image.value + ) + images = self.context.generate_text_to_image( + self.config.settings, + reshape_required, + DEVICE, + ) + self.image_index = 0 + self.gen_images = [] + for img in images: + im = ImageQt(img).copy() + pixmap = QPixmap.fromImage(im) + self.gen_images.append(pixmap) + + if len(self.gen_images) > 1: + self.next_img_btn.setEnabled(True) + self.previous_img_btn.setEnabled(False) + else: + self.next_img_btn.setEnabled(False) + self.previous_img_btn.setEnabled(False) + + self.show_image(self.gen_images[0]) + + self.previous_width = self.config.settings.lcm_diffusion_setting.image_width + self.previous_height = self.config.settings.lcm_diffusion_setting.image_height + self.previous_model = model_id + self.previous_num_of_images = ( + self.config.settings.lcm_diffusion_setting.number_of_images + ) + + def text_to_image(self): + self.img.setText("Please wait...") + worker = ImageGeneratorWorker(self.generate_image) + self.threadpool.start(worker) + + def closeEvent(self, event): + self.config.settings.lcm_diffusion_setting.seed = self.get_seed_value() + print(self.config.settings.lcm_diffusion_setting) + print("Saving settings") + self.config.save() + + def reset_all_settings(self): + self.use_local_model_folder.setChecked(False) + self.width.setCurrentText("512") + self.height.setCurrentText("512") + self.inference_steps.setValue(4) + self.guidance.setValue(10) + self.use_openvino_check.setChecked(False) + self.seed_check.setChecked(False) + self.safety_checker.setChecked(False) + self.results_path.setText(FastStableDiffusionPaths().get_results_path()) + self.use_tae_sd.setChecked(False) + self.use_lcm_lora.setChecked(False) diff --git a/frontend/gui/image_generator_worker.py b/frontend/gui/image_generator_worker.py new file mode 100644 index 0000000000000000000000000000000000000000..3a948365085ece82337309ac91d278e77fa03e40 --- /dev/null +++ b/frontend/gui/image_generator_worker.py @@ -0,0 +1,37 @@ +from PyQt5.QtCore import ( + pyqtSlot, + QRunnable, + pyqtSignal, + pyqtSlot, +) +from PyQt5.QtCore import QObject +import traceback +import sys + + +class WorkerSignals(QObject): + finished = pyqtSignal() + error = pyqtSignal(tuple) + result = pyqtSignal(object) + + +class ImageGeneratorWorker(QRunnable): + def __init__(self, fn, *args, **kwargs): + super(ImageGeneratorWorker, self).__init__() + self.fn = fn + self.args = args + self.kwargs = kwargs + self.signals = WorkerSignals() + + @pyqtSlot() + def run(self): + try: + result = self.fn(*self.args, **self.kwargs) + except: + traceback.print_exc() + exctype, value = sys.exc_info()[:2] + self.signals.error.emit((exctype, value, traceback.format_exc())) + else: + self.signals.result.emit(result) + finally: + self.signals.finished.emit() diff --git a/frontend/gui/ui.py b/frontend/gui/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..9250bf676da1f3dc8a2f5435095b9cec9b08041e --- /dev/null +++ b/frontend/gui/ui.py @@ -0,0 +1,15 @@ +from typing import List +from frontend.gui.app_window import MainWindow +from PyQt5.QtWidgets import QApplication +import sys +from app_settings import AppSettings + + +def start_gui( + argv: List[str], + app_settings: AppSettings, +): + app = QApplication(sys.argv) + window = MainWindow(app_settings) + window.show() + app.exec() diff --git a/frontend/utils.py b/frontend/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0d70defdaff21ffa6ae7ab70378bc82f79abbaf8 --- /dev/null +++ b/frontend/utils.py @@ -0,0 +1,83 @@ +import platform +from os import path +from typing import List + +from backend.device import is_openvino_device +from constants import DEVICE +from paths import get_file_name + + +def is_reshape_required( + prev_width: int, + cur_width: int, + prev_height: int, + cur_height: int, + prev_model: int, + cur_model: int, + prev_num_of_images: int, + cur_num_of_images: int, +) -> bool: + reshape_required = False + if ( + prev_width != cur_width + or prev_height != cur_height + or prev_model != cur_model + or prev_num_of_images != cur_num_of_images + ): + print("Reshape and compile") + reshape_required = True + + return reshape_required + + +def enable_openvino_controls() -> bool: + return is_openvino_device() and platform.system().lower() != "darwin" and platform.processor().lower() != 'arm' + + + +def get_valid_model_id( + models: List, + model_id: str, + default_model: str = "", +) -> str: + if len(models) == 0: + print("Error: model configuration file is empty,please add some models.") + return "" + if model_id == "": + if default_model: + return default_model + else: + return models[0] + + if model_id in models: + return model_id + else: + print( + f"Error:{model_id} Model not found in configuration file,so using first model : {models[0]}" + ) + return models[0] + + +def get_valid_lora_model( + models: List, + cur_model: str, + lora_models_dir: str, +) -> str: + if cur_model == "" or cur_model is None: + print( + f"No lora models found, please add lora models to {lora_models_dir} directory" + ) + return "" + else: + if path.exists(cur_model): + return get_file_name(cur_model) + else: + print(f"Lora model {cur_model} not found") + if len(models) > 0: + print(f"Fallback model - {models[0]}") + return get_file_name(models[0]) + else: + print( + f"No lora models found, please add lora models to {lora_models_dir} directory" + ) + return "" diff --git a/frontend/webui/controlnet_ui.py b/frontend/webui/controlnet_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..5bfab4acb318324a97a9f8191ea96feca80e5cd8 --- /dev/null +++ b/frontend/webui/controlnet_ui.py @@ -0,0 +1,194 @@ +import gradio as gr +from PIL import Image +from backend.lora import get_lora_models +from state import get_settings +from backend.models.lcmdiffusion_setting import ControlNetSetting +from backend.annotators.image_control_factory import ImageControlFactory + +_controlnet_models_map = None +_controlnet_enabled = False +_adapter_path = None + +app_settings = get_settings() + + +def on_user_input( + enable: bool, + adapter_name: str, + conditioning_scale: float, + control_image: Image, + preprocessor: str, +): + if not isinstance(adapter_name, str): + gr.Warning("Please select a valid ControlNet model") + return gr.Checkbox(value=False) + + settings = app_settings.settings.lcm_diffusion_setting + if settings.controlnet is None: + settings.controlnet = ControlNetSetting() + + if enable and (adapter_name is None or adapter_name == ""): + gr.Warning("Please select a valid ControlNet adapter") + return gr.Checkbox(value=False) + elif enable and not control_image: + gr.Warning("Please provide a ControlNet control image") + return gr.Checkbox(value=False) + + if control_image is None: + return gr.Checkbox(value=enable) + + if preprocessor == "None": + processed_control_image = control_image + else: + image_control_factory = ImageControlFactory() + control = image_control_factory.create_control(preprocessor) + processed_control_image = control.get_control_image(control_image) + + if not enable: + settings.controlnet.enabled = False + else: + settings.controlnet.enabled = True + settings.controlnet.adapter_path = _controlnet_models_map[adapter_name] + settings.controlnet.conditioning_scale = float(conditioning_scale) + settings.controlnet._control_image = processed_control_image + + # This code can be improved; currently, if the user clicks the + # "Enable ControlNet" checkbox or changes the currently selected + # ControlNet model, it will trigger a pipeline rebuild even if, in + # the end, the user leaves the same ControlNet settings + global _controlnet_enabled + global _adapter_path + if settings.controlnet.enabled != _controlnet_enabled or ( + settings.controlnet.enabled + and settings.controlnet.adapter_path != _adapter_path + ): + settings.rebuild_pipeline = True + _controlnet_enabled = settings.controlnet.enabled + _adapter_path = settings.controlnet.adapter_path + return gr.Checkbox(value=enable) + + +def on_change_conditioning_scale(cond_scale): + print(cond_scale) + app_settings.settings.lcm_diffusion_setting.controlnet.conditioning_scale = ( + cond_scale + ) + + +def get_controlnet_ui() -> None: + with gr.Blocks() as ui: + gr.HTML( + 'Download ControlNet v1.1 model from ControlNet v1.1 (723 MB files) and place it in controlnet_models folder,restart the app' + ) + with gr.Row(): + with gr.Column(): + with gr.Row(): + global _controlnet_models_map + _controlnet_models_map = get_lora_models( + app_settings.settings.lcm_diffusion_setting.dirs["controlnet"] + ) + controlnet_models = list(_controlnet_models_map.keys()) + default_model = ( + controlnet_models[0] if len(controlnet_models) else None + ) + + enabled_checkbox = gr.Checkbox( + label="Enable ControlNet", + info="Enable ControlNet", + show_label=True, + ) + model_dropdown = gr.Dropdown( + _controlnet_models_map.keys(), + label="ControlNet model", + info="ControlNet model to load (.safetensors format)", + value=default_model, + interactive=True, + ) + conditioning_scale_slider = gr.Slider( + 0.0, + 1.0, + value=0.5, + step=0.05, + label="ControlNet conditioning scale", + interactive=True, + ) + control_image = gr.Image( + label="Control image", + type="pil", + ) + preprocessor_radio = gr.Radio( + [ + "Canny", + "Depth", + "LineArt", + "MLSD", + "NormalBAE", + "Pose", + "SoftEdge", + "Shuffle", + "None", + ], + label="Preprocessor", + info="Select the preprocessor for the control image", + value="Canny", + interactive=True, + ) + + enabled_checkbox.input( + fn=on_user_input, + inputs=[ + enabled_checkbox, + model_dropdown, + conditioning_scale_slider, + control_image, + preprocessor_radio, + ], + outputs=[enabled_checkbox], + ) + model_dropdown.input( + fn=on_user_input, + inputs=[ + enabled_checkbox, + model_dropdown, + conditioning_scale_slider, + control_image, + preprocessor_radio, + ], + outputs=[enabled_checkbox], + ) + conditioning_scale_slider.input( + fn=on_user_input, + inputs=[ + enabled_checkbox, + model_dropdown, + conditioning_scale_slider, + control_image, + preprocessor_radio, + ], + outputs=[enabled_checkbox], + ) + control_image.change( + fn=on_user_input, + inputs=[ + enabled_checkbox, + model_dropdown, + conditioning_scale_slider, + control_image, + preprocessor_radio, + ], + outputs=[enabled_checkbox], + ) + preprocessor_radio.change( + fn=on_user_input, + inputs=[ + enabled_checkbox, + model_dropdown, + conditioning_scale_slider, + control_image, + preprocessor_radio, + ], + outputs=[enabled_checkbox], + ) + conditioning_scale_slider.change( + on_change_conditioning_scale, conditioning_scale_slider + ) diff --git a/frontend/webui/css/style.css b/frontend/webui/css/style.css new file mode 100644 index 0000000000000000000000000000000000000000..216e9f54bd789851bb1aa091cc8beaf98ddbe75d --- /dev/null +++ b/frontend/webui/css/style.css @@ -0,0 +1,22 @@ +footer { + visibility: hidden +} + +#generate_button { + color: white; + border-color: #007bff; + background: #2563eb; + +} + +#save_button { + color: white; + border-color: #028b40; + background: #01b97c; + width: 200px; +} + +#settings_header { + background: rgb(245, 105, 105); + +} \ No newline at end of file diff --git a/frontend/webui/generation_settings_ui.py b/frontend/webui/generation_settings_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..455fbd3eb2b7ecf31c043cb8885b7dbcaf24b6fb --- /dev/null +++ b/frontend/webui/generation_settings_ui.py @@ -0,0 +1,157 @@ +import gradio as gr +from state import get_settings +from backend.models.gen_images import ImageFormat + +app_settings = get_settings() + + +def on_change_inference_steps(steps): + app_settings.settings.lcm_diffusion_setting.inference_steps = steps + + +def on_change_image_width(img_width): + app_settings.settings.lcm_diffusion_setting.image_width = img_width + + +def on_change_image_height(img_height): + app_settings.settings.lcm_diffusion_setting.image_height = img_height + + +def on_change_num_images(num_images): + app_settings.settings.lcm_diffusion_setting.number_of_images = num_images + + +def on_change_guidance_scale(guidance_scale): + app_settings.settings.lcm_diffusion_setting.guidance_scale = guidance_scale + + +def on_change_seed_value(seed): + app_settings.settings.lcm_diffusion_setting.seed = seed + + +def on_change_seed_checkbox(seed_checkbox): + app_settings.settings.lcm_diffusion_setting.use_seed = seed_checkbox + + +def on_change_safety_checker_checkbox(safety_checker_checkbox): + app_settings.settings.lcm_diffusion_setting.use_safety_checker = ( + safety_checker_checkbox + ) + + +def on_change_tiny_auto_encoder_checkbox(tiny_auto_encoder_checkbox): + app_settings.settings.lcm_diffusion_setting.use_tiny_auto_encoder = ( + tiny_auto_encoder_checkbox + ) + + +def on_offline_checkbox(offline_checkbox): + app_settings.settings.lcm_diffusion_setting.use_offline_model = offline_checkbox + + +def on_change_image_format(image_format): + if image_format == "PNG": + app_settings.settings.generated_images.format = ImageFormat.PNG.value.upper() + else: + app_settings.settings.generated_images.format = ImageFormat.JPEG.value.upper() + + app_settings.save() + + +def get_generation_settings_ui() -> None: + with gr.Blocks(): + with gr.Row(): + with gr.Column(): + num_inference_steps = gr.Slider( + 1, + 25, + value=app_settings.settings.lcm_diffusion_setting.inference_steps, + step=1, + label="Inference Steps", + interactive=True, + ) + + image_height = gr.Slider( + 256, + 1024, + value=app_settings.settings.lcm_diffusion_setting.image_height, + step=256, + label="Image Height", + interactive=True, + ) + image_width = gr.Slider( + 256, + 1024, + value=app_settings.settings.lcm_diffusion_setting.image_width, + step=256, + label="Image Width", + interactive=True, + ) + num_images = gr.Slider( + 1, + 50, + value=app_settings.settings.lcm_diffusion_setting.number_of_images, + step=1, + label="Number of images to generate", + interactive=True, + ) + guidance_scale = gr.Slider( + 1.0, + 2.0, + value=app_settings.settings.lcm_diffusion_setting.guidance_scale, + step=0.1, + label="Guidance Scale", + interactive=True, + ) + + seed = gr.Slider( + value=app_settings.settings.lcm_diffusion_setting.seed, + minimum=0, + maximum=999999999, + label="Seed", + step=1, + interactive=True, + ) + seed_checkbox = gr.Checkbox( + label="Use seed", + value=app_settings.settings.lcm_diffusion_setting.use_seed, + interactive=True, + ) + + safety_checker_checkbox = gr.Checkbox( + label="Use Safety Checker", + value=app_settings.settings.lcm_diffusion_setting.use_safety_checker, + interactive=True, + ) + tiny_auto_encoder_checkbox = gr.Checkbox( + label="Use tiny auto encoder for SD", + value=app_settings.settings.lcm_diffusion_setting.use_tiny_auto_encoder, + interactive=True, + ) + offline_checkbox = gr.Checkbox( + label="Use locally cached model or downloaded model folder(offline)", + value=app_settings.settings.lcm_diffusion_setting.use_offline_model, + interactive=True, + ) + img_format = gr.Radio( + label="Output image format", + choices=["PNG", "JPEG"], + value=app_settings.settings.generated_images.format, + interactive=True, + ) + + num_inference_steps.change(on_change_inference_steps, num_inference_steps) + image_height.change(on_change_image_height, image_height) + image_width.change(on_change_image_width, image_width) + num_images.change(on_change_num_images, num_images) + guidance_scale.change(on_change_guidance_scale, guidance_scale) + seed.change(on_change_seed_value, seed) + seed_checkbox.change(on_change_seed_checkbox, seed_checkbox) + safety_checker_checkbox.change( + on_change_safety_checker_checkbox, safety_checker_checkbox + ) + tiny_auto_encoder_checkbox.change( + on_change_tiny_auto_encoder_checkbox, tiny_auto_encoder_checkbox + ) + offline_checkbox.change(on_offline_checkbox, offline_checkbox) + img_format.change(on_change_image_format, img_format) diff --git a/frontend/webui/image_to_image_ui.py b/frontend/webui/image_to_image_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..2d1ca9a7f7e055da17247b349154bca56d2d7c8d --- /dev/null +++ b/frontend/webui/image_to_image_ui.py @@ -0,0 +1,120 @@ +from typing import Any +import gradio as gr +from backend.models.lcmdiffusion_setting import DiffusionTask +from models.interface_types import InterfaceType +from frontend.utils import is_reshape_required +from constants import DEVICE +from state import get_settings, get_context +from concurrent.futures import ThreadPoolExecutor + + +app_settings = get_settings() + +previous_width = 0 +previous_height = 0 +previous_model_id = "" +previous_num_of_images = 0 + + +def generate_image_to_image( + prompt, + negative_prompt, + init_image, + strength, +) -> Any: + context = get_context(InterfaceType.WEBUI) + global previous_height, previous_width, previous_model_id, previous_num_of_images, app_settings + + app_settings.settings.lcm_diffusion_setting.prompt = prompt + app_settings.settings.lcm_diffusion_setting.negative_prompt = negative_prompt + app_settings.settings.lcm_diffusion_setting.init_image = init_image + app_settings.settings.lcm_diffusion_setting.strength = strength + + app_settings.settings.lcm_diffusion_setting.diffusion_task = ( + DiffusionTask.image_to_image.value + ) + model_id = app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id + reshape = False + image_width = app_settings.settings.lcm_diffusion_setting.image_width + image_height = app_settings.settings.lcm_diffusion_setting.image_height + num_images = app_settings.settings.lcm_diffusion_setting.number_of_images + if app_settings.settings.lcm_diffusion_setting.use_openvino: + reshape = is_reshape_required( + previous_width, + image_width, + previous_height, + image_height, + previous_model_id, + model_id, + previous_num_of_images, + num_images, + ) + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit( + context.generate_text_to_image, + app_settings.settings, + reshape, + DEVICE, + ) + images = future.result() + + previous_width = image_width + previous_height = image_height + previous_model_id = model_id + previous_num_of_images = num_images + return images + + +def get_image_to_image_ui() -> None: + with gr.Blocks(): + with gr.Row(): + with gr.Column(): + input_image = gr.Image(label="Init image", type="pil") + with gr.Row(): + prompt = gr.Textbox( + show_label=False, + lines=3, + placeholder="A fantasy landscape", + container=False, + ) + + generate_btn = gr.Button( + "Generate", + elem_id="generate_button", + scale=0, + ) + negative_prompt = gr.Textbox( + label="Negative prompt (Works in LCM-LoRA mode, set guidance > 1.0):", + lines=1, + placeholder="", + ) + strength = gr.Slider( + 0.1, + 1, + value=app_settings.settings.lcm_diffusion_setting.strength, + step=0.01, + label="Strength", + ) + + input_params = [ + prompt, + negative_prompt, + input_image, + strength, + ] + + with gr.Column(): + output = gr.Gallery( + label="Generated images", + show_label=True, + elem_id="gallery", + columns=2, + height=512, + ) + + generate_btn.click( + fn=generate_image_to_image, + inputs=input_params, + outputs=output, + ) diff --git a/frontend/webui/image_variations_ui.py b/frontend/webui/image_variations_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..215785601ffd12f3c7fd0ad3a3255503adf88c57 --- /dev/null +++ b/frontend/webui/image_variations_ui.py @@ -0,0 +1,106 @@ +from typing import Any +import gradio as gr +from backend.models.lcmdiffusion_setting import DiffusionTask +from context import Context +from models.interface_types import InterfaceType +from frontend.utils import is_reshape_required +from constants import DEVICE +from state import get_settings, get_context +from concurrent.futures import ThreadPoolExecutor + +app_settings = get_settings() + + +previous_width = 0 +previous_height = 0 +previous_model_id = "" +previous_num_of_images = 0 + + +def generate_image_variations( + init_image, + variation_strength, +) -> Any: + context = get_context(InterfaceType.WEBUI) + global previous_height, previous_width, previous_model_id, previous_num_of_images, app_settings + + app_settings.settings.lcm_diffusion_setting.init_image = init_image + app_settings.settings.lcm_diffusion_setting.strength = variation_strength + app_settings.settings.lcm_diffusion_setting.prompt = "" + app_settings.settings.lcm_diffusion_setting.negative_prompt = "" + + app_settings.settings.lcm_diffusion_setting.diffusion_task = ( + DiffusionTask.image_to_image.value + ) + model_id = app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id + reshape = False + image_width = app_settings.settings.lcm_diffusion_setting.image_width + image_height = app_settings.settings.lcm_diffusion_setting.image_height + num_images = app_settings.settings.lcm_diffusion_setting.number_of_images + if app_settings.settings.lcm_diffusion_setting.use_openvino: + reshape = is_reshape_required( + previous_width, + image_width, + previous_height, + image_height, + previous_model_id, + model_id, + previous_num_of_images, + num_images, + ) + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit( + context.generate_text_to_image, + app_settings.settings, + reshape, + DEVICE, + ) + images = future.result() + + previous_width = image_width + previous_height = image_height + previous_model_id = model_id + previous_num_of_images = num_images + return images + + +def get_image_variations_ui() -> None: + with gr.Blocks(): + with gr.Row(): + with gr.Column(): + input_image = gr.Image(label="Init image", type="pil") + with gr.Row(): + generate_btn = gr.Button( + "Generate", + elem_id="generate_button", + scale=0, + ) + + variation_strength = gr.Slider( + 0.1, + 1, + value=0.4, + step=0.01, + label="Variations Strength", + ) + + input_params = [ + input_image, + variation_strength, + ] + + with gr.Column(): + output = gr.Gallery( + label="Generated images", + show_label=True, + elem_id="gallery", + columns=2, + height=512, + ) + + generate_btn.click( + fn=generate_image_variations, + inputs=input_params, + outputs=output, + ) diff --git a/frontend/webui/lora_models_ui.py b/frontend/webui/lora_models_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..7c4b626d5aecdb23b5ce73e5584555f4f498abf2 --- /dev/null +++ b/frontend/webui/lora_models_ui.py @@ -0,0 +1,185 @@ +import gradio as gr +from os import path +from backend.lora import ( + get_lora_models, + get_active_lora_weights, + update_lora_weights, + load_lora_weight, +) +from state import get_settings, get_context +from frontend.utils import get_valid_lora_model +from models.interface_types import InterfaceType +from backend.models.lcmdiffusion_setting import LCMDiffusionSetting + + +_MAX_LORA_WEIGHTS = 5 + +_custom_lora_sliders = [] +_custom_lora_names = [] +_custom_lora_columns = [] + +app_settings = get_settings() + + +def on_click_update_weight(*lora_weights): + update_weights = [] + active_weights = get_active_lora_weights() + if not len(active_weights): + gr.Warning("No active LoRAs, first you need to load LoRA model") + return + for idx, lora in enumerate(active_weights): + update_weights.append( + ( + lora[0], + lora_weights[idx], + ) + ) + if len(update_weights) > 0: + update_lora_weights( + get_context(InterfaceType.WEBUI).lcm_text_to_image.pipeline, + app_settings.settings.lcm_diffusion_setting, + update_weights, + ) + + +def on_click_load_lora(lora_name, lora_weight): + if app_settings.settings.lcm_diffusion_setting.use_openvino: + gr.Warning("Currently LoRA is not supported in OpenVINO.") + return + lora_models_map = get_lora_models( + app_settings.settings.lcm_diffusion_setting.lora.models_dir + ) + + # Load a new LoRA + settings = app_settings.settings.lcm_diffusion_setting + settings.lora.fuse = False + settings.lora.enabled = False + settings.lora.path = lora_models_map[lora_name] + settings.lora.weight = lora_weight + if not path.exists(settings.lora.path): + gr.Warning("Invalid LoRA model path!") + return + pipeline = get_context(InterfaceType.WEBUI).lcm_text_to_image.pipeline + if not pipeline: + gr.Warning("Pipeline not initialized. Please generate an image first.") + return + settings.lora.enabled = True + load_lora_weight( + get_context(InterfaceType.WEBUI).lcm_text_to_image.pipeline, + settings, + ) + + # Update Gradio LoRA UI + global _MAX_LORA_WEIGHTS + values = [] + labels = [] + rows = [] + active_weights = get_active_lora_weights() + for idx, lora in enumerate(active_weights): + labels.append(f"{lora[0]}: ") + values.append(lora[1]) + rows.append(gr.Row.update(visible=True)) + for i in range(len(active_weights), _MAX_LORA_WEIGHTS): + labels.append(f"Update weight") + values.append(0.0) + rows.append(gr.Row.update(visible=False)) + return labels + values + rows + + +def get_lora_models_ui() -> None: + with gr.Blocks() as ui: + gr.HTML( + "Download and place your LoRA model weights in lora_models folders and restart App" + ) + with gr.Row(): + + with gr.Column(): + with gr.Row(): + lora_models_map = get_lora_models( + app_settings.settings.lcm_diffusion_setting.lora.models_dir + ) + valid_model = get_valid_lora_model( + list(lora_models_map.values()), + app_settings.settings.lcm_diffusion_setting.lora.path, + app_settings.settings.lcm_diffusion_setting.lora.models_dir, + ) + if valid_model != "": + valid_model_path = lora_models_map[valid_model] + app_settings.settings.lcm_diffusion_setting.lora.path = ( + valid_model_path + ) + else: + app_settings.settings.lcm_diffusion_setting.lora.path = "" + + lora_model = gr.Dropdown( + lora_models_map.keys(), + label="LoRA model", + info="LoRA model weight to load (You can use Lora models from Civitai or Hugging Face .safetensors format)", + value=valid_model, + interactive=True, + ) + + lora_weight = gr.Slider( + 0.0, + 1.0, + value=app_settings.settings.lcm_diffusion_setting.lora.weight, + step=0.05, + label="Initial Lora weight", + interactive=True, + ) + load_lora_btn = gr.Button( + "Load selected LoRA", + elem_id="load_lora_button", + scale=0, + ) + + with gr.Row(): + gr.Markdown( + "## Loaded LoRA models", + show_label=False, + ) + update_lora_weights_btn = gr.Button( + "Update LoRA weights", + elem_id="load_lora_button", + scale=0, + ) + + global _MAX_LORA_WEIGHTS + global _custom_lora_sliders + global _custom_lora_names + global _custom_lora_columns + for i in range(0, _MAX_LORA_WEIGHTS): + new_row = gr.Column(visible=False) + _custom_lora_columns.append(new_row) + with new_row: + lora_name = gr.Markdown( + "Lora Name", + show_label=True, + ) + lora_slider = gr.Slider( + 0.0, + 1.0, + step=0.05, + label="LoRA weight", + interactive=True, + visible=True, + ) + + _custom_lora_names.append(lora_name) + _custom_lora_sliders.append(lora_slider) + + load_lora_btn.click( + fn=on_click_load_lora, + inputs=[lora_model, lora_weight], + outputs=[ + *_custom_lora_names, + *_custom_lora_sliders, + *_custom_lora_columns, + ], + ) + + update_lora_weights_btn.click( + fn=on_click_update_weight, + inputs=[*_custom_lora_sliders], + outputs=None, + ) diff --git a/frontend/webui/models_ui.py b/frontend/webui/models_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e8e6ca69a67422dd74773934dbd01532627d50 --- /dev/null +++ b/frontend/webui/models_ui.py @@ -0,0 +1,85 @@ +from app_settings import AppSettings +from typing import Any +import gradio as gr +from constants import LCM_DEFAULT_MODEL, LCM_DEFAULT_MODEL_OPENVINO +from state import get_settings +from frontend.utils import get_valid_model_id + +app_settings = get_settings() +app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id = get_valid_model_id( + app_settings.openvino_lcm_models, + app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id, +) + + +def change_lcm_model_id(model_id): + app_settings.settings.lcm_diffusion_setting.lcm_model_id = model_id + + +def change_lcm_lora_model_id(model_id): + app_settings.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id = model_id + + +def change_lcm_lora_base_model_id(model_id): + app_settings.settings.lcm_diffusion_setting.lcm_lora.base_model_id = model_id + + +def change_openvino_lcm_model_id(model_id): + app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id = model_id + + +def get_models_ui() -> None: + with gr.Blocks(): + with gr.Row(): + lcm_model_id = gr.Dropdown( + app_settings.lcm_models, + label="LCM model", + info="Diffusers LCM model ID", + value=get_valid_model_id( + app_settings.lcm_models, + app_settings.settings.lcm_diffusion_setting.lcm_model_id, + LCM_DEFAULT_MODEL, + ), + interactive=True, + ) + with gr.Row(): + lcm_lora_model_id = gr.Dropdown( + app_settings.lcm_lora_models, + label="LCM LoRA model", + info="Diffusers LCM LoRA model ID", + value=get_valid_model_id( + app_settings.lcm_lora_models, + app_settings.settings.lcm_diffusion_setting.lcm_lora.lcm_lora_id, + ), + interactive=True, + ) + lcm_lora_base_model_id = gr.Dropdown( + app_settings.stable_diffsuion_models, + label="LCM LoRA base model", + info="Diffusers LCM LoRA base model ID", + value=get_valid_model_id( + app_settings.stable_diffsuion_models, + app_settings.settings.lcm_diffusion_setting.lcm_lora.base_model_id, + ), + interactive=True, + ) + with gr.Row(): + lcm_openvino_model_id = gr.Dropdown( + app_settings.openvino_lcm_models, + label="LCM OpenVINO model", + info="OpenVINO LCM-LoRA fused model ID", + value=get_valid_model_id( + app_settings.openvino_lcm_models, + app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id, + ), + interactive=True, + ) + + lcm_model_id.change(change_lcm_model_id, lcm_model_id) + lcm_lora_model_id.change(change_lcm_lora_model_id, lcm_lora_model_id) + lcm_lora_base_model_id.change( + change_lcm_lora_base_model_id, lcm_lora_base_model_id + ) + lcm_openvino_model_id.change( + change_openvino_lcm_model_id, lcm_openvino_model_id + ) diff --git a/frontend/webui/realtime_ui.py b/frontend/webui/realtime_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..cdceee8a8436c59755447f0e7d7252ff1c53e48f --- /dev/null +++ b/frontend/webui/realtime_ui.py @@ -0,0 +1,146 @@ +import gradio as gr +from backend.lcm_text_to_image import LCMTextToImage +from backend.models.lcmdiffusion_setting import LCMLora, LCMDiffusionSetting +from constants import DEVICE, LCM_DEFAULT_MODEL_OPENVINO +from time import perf_counter +import numpy as np +from cv2 import imencode +import base64 +from backend.device import get_device_name +from constants import APP_VERSION +from backend.device import is_openvino_device + +lcm_text_to_image = LCMTextToImage() +lcm_lora = LCMLora( + base_model_id="Lykon/dreamshaper-8", + lcm_lora_id="latent-consistency/lcm-lora-sdv1-5", +) + + +# https://github.com/gradio-app/gradio/issues/2635#issuecomment-1423531319 +def encode_pil_to_base64_new(pil_image): + image_arr = np.asarray(pil_image)[:, :, ::-1] + _, byte_data = imencode(".png", image_arr) + base64_data = base64.b64encode(byte_data) + base64_string_opencv = base64_data.decode("utf-8") + return "data:image/png;base64," + base64_string_opencv + + +# monkey patching encode pil +gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new + + +def predict( + prompt, + steps, + seed, +): + lcm_diffusion_setting = LCMDiffusionSetting() + lcm_diffusion_setting.openvino_lcm_model_id = "rupeshs/sdxs-512-0.9-openvino" + lcm_diffusion_setting.prompt = prompt + lcm_diffusion_setting.guidance_scale = 1.0 + lcm_diffusion_setting.inference_steps = steps + lcm_diffusion_setting.seed = seed + lcm_diffusion_setting.use_seed = True + lcm_diffusion_setting.image_width = 512 + lcm_diffusion_setting.image_height = 512 + lcm_diffusion_setting.use_openvino = True if is_openvino_device() else False + lcm_diffusion_setting.use_tiny_auto_encoder = True + lcm_text_to_image.init( + DEVICE, + lcm_diffusion_setting, + ) + start = perf_counter() + + images = lcm_text_to_image.generate(lcm_diffusion_setting) + latency = perf_counter() - start + print(f"Latency: {latency:.2f} seconds") + return images[0] + + +css = """ +#container{ + margin: 0 auto; + max-width: 40rem; +} +#intro{ + max-width: 100%; + text-align: center; + margin: 0 auto; +} +#generate_button { + color: white; + border-color: #007bff; + background: #007bff; + width: 200px; + height: 50px; +} +footer { + visibility: hidden +} +""" + + +def _get_footer_message() -> str: + version = f"

{APP_VERSION} " + footer_msg = version + ( + ' © 2023 - 2024 ' + " Rupesh Sreeraman

" + ) + return footer_msg + + +with gr.Blocks(css=css) as demo: + with gr.Column(elem_id="container"): + use_openvino = "- OpenVINO" if is_openvino_device() else "" + gr.Markdown( + f"""# Realtime FastSD CPU {use_openvino} + **Device : {DEVICE} , {get_device_name()}** + """, + elem_id="intro", + ) + + with gr.Row(): + with gr.Row(): + prompt = gr.Textbox( + placeholder="Describe the image you'd like to see", + scale=5, + container=False, + ) + generate_btn = gr.Button( + "Generate", + scale=1, + elem_id="generate_button", + ) + + image = gr.Image(type="filepath") + + steps = gr.Slider( + label="Steps", + value=1, + minimum=1, + maximum=6, + step=1, + visible=False, + ) + seed = gr.Slider( + randomize=True, + minimum=0, + maximum=999999999, + label="Seed", + step=1, + ) + gr.HTML(_get_footer_message()) + + inputs = [prompt, steps, seed] + prompt.input(fn=predict, inputs=inputs, outputs=image, show_progress=False) + generate_btn.click( + fn=predict, inputs=inputs, outputs=image, show_progress=False + ) + steps.change(fn=predict, inputs=inputs, outputs=image, show_progress=False) + seed.change(fn=predict, inputs=inputs, outputs=image, show_progress=False) + + +def start_realtime_text_to_image(share=False): + demo.queue() + demo.launch(share=share) diff --git a/frontend/webui/text_to_image_ui.py b/frontend/webui/text_to_image_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..5ec517dcff55cc3f40088ea21788fbe113dda0b0 --- /dev/null +++ b/frontend/webui/text_to_image_ui.py @@ -0,0 +1,100 @@ +import gradio as gr +from typing import Any +from backend.models.lcmdiffusion_setting import DiffusionTask +from models.interface_types import InterfaceType +from constants import DEVICE +from state import get_settings, get_context +from frontend.utils import is_reshape_required +from concurrent.futures import ThreadPoolExecutor +from pprint import pprint + +app_settings = get_settings() + +previous_width = 0 +previous_height = 0 +previous_model_id = "" +previous_num_of_images = 0 + + +def generate_text_to_image( + prompt, + neg_prompt, +) -> Any: + context = get_context(InterfaceType.WEBUI) + global previous_height, previous_width, previous_model_id, previous_num_of_images, app_settings + app_settings.settings.lcm_diffusion_setting.prompt = prompt + app_settings.settings.lcm_diffusion_setting.negative_prompt = neg_prompt + app_settings.settings.lcm_diffusion_setting.diffusion_task = ( + DiffusionTask.text_to_image.value + ) + model_id = app_settings.settings.lcm_diffusion_setting.openvino_lcm_model_id + reshape = False + image_width = app_settings.settings.lcm_diffusion_setting.image_width + image_height = app_settings.settings.lcm_diffusion_setting.image_height + num_images = app_settings.settings.lcm_diffusion_setting.number_of_images + if app_settings.settings.lcm_diffusion_setting.use_openvino: + reshape = is_reshape_required( + previous_width, + image_width, + previous_height, + image_height, + previous_model_id, + model_id, + previous_num_of_images, + num_images, + ) + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit( + context.generate_text_to_image, + app_settings.settings, + reshape, + DEVICE, + ) + images = future.result() + + previous_width = image_width + previous_height = image_height + previous_model_id = model_id + previous_num_of_images = num_images + return images + + +def get_text_to_image_ui() -> None: + with gr.Blocks(): + with gr.Row(): + with gr.Column(): + with gr.Row(): + prompt = gr.Textbox( + show_label=False, + lines=3, + placeholder="A fantasy landscape", + container=False, + ) + + generate_btn = gr.Button( + "Generate", + elem_id="generate_button", + scale=0, + ) + negative_prompt = gr.Textbox( + label="Negative prompt (Works in LCM-LoRA mode, set guidance > 1.0) :", + lines=1, + placeholder="", + ) + + input_params = [prompt, negative_prompt] + + with gr.Column(): + output = gr.Gallery( + label="Generated images", + show_label=True, + elem_id="gallery", + columns=2, + height=512, + ) + generate_btn.click( + fn=generate_text_to_image, + inputs=input_params, + outputs=output, + ) diff --git a/frontend/webui/ui.py b/frontend/webui/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..fdbc133da4ed2c37187d5112082e062af75331dc --- /dev/null +++ b/frontend/webui/ui.py @@ -0,0 +1,90 @@ +import gradio as gr +from constants import APP_VERSION +from frontend.webui.text_to_image_ui import get_text_to_image_ui +from frontend.webui.image_to_image_ui import get_image_to_image_ui +from frontend.webui.generation_settings_ui import get_generation_settings_ui +from frontend.webui.models_ui import get_models_ui +from frontend.webui.image_variations_ui import get_image_variations_ui +from frontend.webui.upscaler_ui import get_upscaler_ui +from frontend.webui.lora_models_ui import get_lora_models_ui +from frontend.webui.controlnet_ui import get_controlnet_ui +from paths import FastStableDiffusionPaths +from state import get_settings + +app_settings = get_settings() + + +def _get_footer_message() -> str: + version = f"

{APP_VERSION} " + footer_msg = version + ( + ' © 2023 - 2024 ' + " Rupesh Sreeraman

" + ) + return footer_msg + + +def get_web_ui() -> gr.Blocks: + def change_mode(mode): + global app_settings + app_settings.settings.lcm_diffusion_setting.use_lcm_lora = False + app_settings.settings.lcm_diffusion_setting.use_openvino = False + if mode == "LCM-LoRA": + app_settings.settings.lcm_diffusion_setting.use_lcm_lora = True + elif mode == "LCM-OpenVINO": + app_settings.settings.lcm_diffusion_setting.use_openvino = True + + # Prevent saved LoRA and ControlNet settings from being used by + # default; in WebUI mode, the user must explicitly enable those + if app_settings.settings.lcm_diffusion_setting.lora: + app_settings.settings.lcm_diffusion_setting.lora.enabled = False + if app_settings.settings.lcm_diffusion_setting.controlnet: + app_settings.settings.lcm_diffusion_setting.controlnet.enabled = False + + with gr.Blocks( + css=FastStableDiffusionPaths.get_css_path(), + title="FastSD CPU", + ) as fastsd_web_ui: + gr.HTML("

FastSD CPU

") + current_mode = "LCM" + if app_settings.settings.lcm_diffusion_setting.use_openvino: + current_mode = "LCM-OpenVINO" + elif app_settings.settings.lcm_diffusion_setting.use_lcm_lora: + current_mode = "LCM-LoRA" + + mode = gr.Radio( + ["LCM", "LCM-LoRA", "LCM-OpenVINO"], + label="Mode", + info="Current working mode", + value=current_mode, + ) + mode.change(change_mode, inputs=mode) + + with gr.Tabs(): + with gr.TabItem("Text to Image"): + get_text_to_image_ui() + with gr.TabItem("Image to Image"): + get_image_to_image_ui() + with gr.TabItem("Image Variations"): + get_image_variations_ui() + with gr.TabItem("Upscaler"): + get_upscaler_ui() + with gr.TabItem("Generation Settings"): + get_generation_settings_ui() + with gr.TabItem("Models"): + get_models_ui() + with gr.TabItem("Lora Models"): + get_lora_models_ui() + with gr.TabItem("ControlNet"): + get_controlnet_ui() + + gr.HTML(_get_footer_message()) + + return fastsd_web_ui + + +def start_webui( + share: bool = False, +): + webui = get_web_ui() + webui.queue() + webui.launch(share=share) diff --git a/frontend/webui/upscaler_ui.py b/frontend/webui/upscaler_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..41d7cde0ce72b1069d4376755dbd5937200d4f08 --- /dev/null +++ b/frontend/webui/upscaler_ui.py @@ -0,0 +1,81 @@ +from typing import Any +import gradio as gr +from models.interface_types import InterfaceType +from state import get_settings, get_context +from backend.upscale.upscaler import upscale_image +from backend.models.upscale import UpscaleMode +from paths import FastStableDiffusionPaths, join_paths +from time import time + +app_settings = get_settings() + + +previous_width = 0 +previous_height = 0 +previous_model_id = "" +previous_num_of_images = 0 + + +def create_upscaled_image( + source_image, + upscale_mode, +) -> Any: + context = get_context(InterfaceType.WEBUI) + scale_factor = 2 + if upscale_mode == "SD": + mode = UpscaleMode.sd_upscale.value + else: + mode = UpscaleMode.normal.value + + upscaled_filepath = FastStableDiffusionPaths.get_upscale_filepath( + None, + scale_factor, + app_settings.settings.generated_images.format, + ) + image = upscale_image( + context=context, + src_image_path=source_image, + dst_image_path=upscaled_filepath, + upscale_mode=mode, + ) + return image + + +def get_upscaler_ui() -> None: + with gr.Blocks(): + with gr.Row(): + with gr.Column(): + input_image = gr.Image(label="Image", type="filepath") + with gr.Row(): + upscale_mode = gr.Radio( + ["EDSR", "SD"], + label="Upscale Mode (2x)", + info="Select upscale method, SD Upscale is experimental", + value="EDSR", + ) + + generate_btn = gr.Button( + "Upscale", + elem_id="generate_button", + scale=0, + ) + + input_params = [ + input_image, + upscale_mode, + ] + + with gr.Column(): + output = gr.Gallery( + label="Generated images", + show_label=True, + elem_id="gallery", + columns=2, + height=512, + ) + + generate_btn.click( + fn=create_upscaled_image, + inputs=input_params, + outputs=output, + ) diff --git a/image_ops.py b/image_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..b60e911d37616bf29592b15fca9901f404a6e397 --- /dev/null +++ b/image_ops.py @@ -0,0 +1,15 @@ +from PIL import Image + + +def resize_pil_image( + pil_image: Image, + image_width, + image_height, +): + return pil_image.convert("RGB").resize( + ( + image_width, + image_height, + ), + Image.Resampling.LANCZOS, + ) diff --git a/models/interface_types.py b/models/interface_types.py new file mode 100644 index 0000000000000000000000000000000000000000..fcab144facabbf7342331129410183ea0b6075ee --- /dev/null +++ b/models/interface_types.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class InterfaceType(Enum): + WEBUI = "Web User Interface" + GUI = "Graphical User Interface" + CLI = "Command Line Interface" diff --git a/models/settings.py b/models/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..afa1650fb5dab9dcef5645ac4aaf184939e8ae49 --- /dev/null +++ b/models/settings.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel +from backend.models.lcmdiffusion_setting import LCMDiffusionSetting, LCMLora +from backend.models.gen_images import GeneratedImages + + +class Settings(BaseModel): + lcm_diffusion_setting: LCMDiffusionSetting = LCMDiffusionSetting(lcm_lora=LCMLora()) + generated_images: GeneratedImages = GeneratedImages() diff --git a/paths.py b/paths.py new file mode 100644 index 0000000000000000000000000000000000000000..2de41b2ef496b584965d15d6311cfba3519022ee --- /dev/null +++ b/paths.py @@ -0,0 +1,98 @@ +import os +import constants +from pathlib import Path +from time import time +from utils import get_image_file_extension + + +def join_paths( + first_path: str, + second_path: str, +) -> str: + return os.path.join(first_path, second_path) + + +def get_file_name(file_path: str) -> str: + return Path(file_path).stem + + +def get_app_path() -> str: + app_dir = os.path.dirname(__file__) + work_dir = os.path.dirname(app_dir) + return work_dir + + +def get_configs_path() -> str: + config_path = join_paths(get_app_path(), constants.CONFIG_DIRECTORY) + return config_path + + +class FastStableDiffusionPaths: + @staticmethod + def get_app_settings_path() -> str: + configs_path = get_configs_path() + settings_path = join_paths( + configs_path, + constants.APP_SETTINGS_FILE, + ) + return settings_path + + @staticmethod + def get_results_path() -> str: + results_path = join_paths(get_app_path(), constants.RESULTS_DIRECTORY) + return results_path + + @staticmethod + def get_css_path() -> str: + app_dir = os.path.dirname(__file__) + css_path = os.path.join( + app_dir, + "frontend", + "webui", + "css", + "style.css", + ) + return css_path + + @staticmethod + def get_models_config_path(model_config_file: str) -> str: + configs_path = get_configs_path() + models_path = join_paths( + configs_path, + model_config_file, + ) + return models_path + + @staticmethod + def get_upscale_filepath( + file_path_src: str, + scale_factor: int, + format: str, + ) -> str: + if file_path_src: + file_name_src = get_file_name(file_path_src) + else: + file_name_src = "fastsdcpu" + + extension = get_image_file_extension(format) + upscaled_filepath = join_paths( + FastStableDiffusionPaths.get_results_path(), + f"{file_name_src}_{int(scale_factor)}x_upscale_{int(time())}{extension}", + ) + return upscaled_filepath + + @staticmethod + def get_lora_models_path() -> str: + lora_models_path = join_paths(get_app_path(), constants.LORA_DIRECTORY) + return lora_models_path + + @staticmethod + def get_controlnet_models_path() -> str: + controlnet_models_path = join_paths( + get_app_path(), constants.CONTROLNET_DIRECTORY + ) + return controlnet_models_path + + +def get_base_folder_name(path: str) -> str: + return os.path.basename(path) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d768c4ced81b144f8d0ab8a7881de152d047bb9e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,32 @@ +accelerate==0.23.0 +diffusers==0.23.0 +transformers==4.35.0 +PyQt5 +Pillow==9.4.0 +openvino==2023.2.0 +optimum==1.14.0 +optimum-intel==1.12.1 +onnx==1.15.0 +onnxruntime==1.16.1 +pydantic==2.4.2 +typing-extensions==4.8.0 +pyyaml==6.0.1 +gradio==3.39.0 +peft==0.6.1 +opencv-python==4.8.1.7 +accelerate +accelerate==0.23.0 +diffusers==0.23.0 +transformers==4.35.0 +PyQt5 +Pillow==9.4.0 +openvino==2023.2.0 +optimum==1.14.0 +optimum-intel==1.12.1 +onnx==1.15.0 +onnxruntime==1.16.1 +pydantic==2.4.2 +typing-extensions==4.8.0 +pyyaml==6.0.1 +gradio==3.39.0 +peft==0.6.1 diff --git a/state.py b/state.py new file mode 100644 index 0000000000000000000000000000000000000000..6c0c4d5599c192840646f8d655cc16e4fbb8c6b2 --- /dev/null +++ b/state.py @@ -0,0 +1,32 @@ +from app_settings import AppSettings +from typing import Optional + +from context import Context +from models.interface_types import InterfaceType + + +class _AppState: + _instance: Optional["_AppState"] = None + settings: Optional[AppSettings] = None + context: Optional[Context] = None + + +def get_state() -> _AppState: + if _AppState._instance is None: + _AppState._instance = _AppState() + return _AppState._instance + + +def get_settings(skip_file: bool = False) -> AppSettings: + state = get_state() + if state.settings is None: + state.settings = AppSettings() + state.settings.load(skip_file) + return state.settings + + +def get_context(interface_type: InterfaceType) -> Context: + state = get_state() + if state.context is None: + state.context = Context(interface_type) + return state.context diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c9a8d03f178c7d479ac8dab895b1bd2f6be47757 --- /dev/null +++ b/utils.py @@ -0,0 +1,28 @@ +import platform +from typing import List + + +def show_system_info(): + try: + print(f"Running on {platform.system()} platform") + print(f"OS: {platform.platform()}") + print(f"Processor: {platform.processor()}") + except Exception as ex: + print(f"Error ocurred while getting system information {ex}") + + +def get_models_from_text_file(file_path: str) -> List: + models = [] + with open(file_path, "r") as file: + lines = file.readlines() + for repo_id in lines: + if repo_id.strip() != "": + models.append(repo_id.strip()) + return models + + +def get_image_file_extension(image_format: str) -> str: + if image_format == "JPEG": + return ".jpg" + elif image_format == "PNG": + return ".png"