Spaces:
Paused
Paused
from diffusers import AutoPipelineForImage2Image | |
from diffusers import DDPMScheduler | |
from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img import retrieve_timesteps, retrieve_latents | |
from diffusers.schedulers.scheduling_ddim import DDIMSchedulerOutput | |
import torch | |
from PIL import Image | |
num_steps_inversion = 5 | |
strngth = 0.8 | |
generator = None | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
image_path = "edit_dataset/01.jpg" | |
src_prompt = "butterfly perched on purple flower" | |
tgt_prompt = "dragonfly perched on purple flower" | |
ws1 = [1.5, 1.5, 1.5, 1.5] | |
ws2 = [1, 1, 1, 1] | |
def encode_image(image, pipe): | |
image = pipe.image_processor.preprocess(image) | |
image = image.to(device=device, dtype=pipeline.dtype) | |
if pipe.vae.config.force_upcast: | |
image = image.float() | |
pipe.vae.to(dtype=torch.float32) | |
if isinstance(generator, list): | |
init_latents = [ | |
retrieve_latents(pipe.vae.encode(image[i : i + 1]), generator=generator[i]) | |
for i in range(1) | |
] | |
init_latents = torch.cat(init_latents, dim=0) | |
else: | |
init_latents = retrieve_latents(pipe.vae.encode(image), generator=generator) | |
if pipe.vae.config.force_upcast: | |
pipe.vae.to(pipeline.dtype) | |
init_latents = init_latents.to(pipeline.dtype) | |
init_latents = pipe.vae.config.scaling_factor * init_latents | |
return init_latents.to(dtype=torch.float16) | |
# def create_xts(scheduler, timesteps, x_0, noise_shift_delta=1, generator=None): | |
# noising_delta = noise_shift_delta * (timesteps[0] - timesteps[1]) | |
# noise_timesteps = [timestep - int(noising_delta) for timestep in timesteps] | |
# noise_timesteps = noise_timesteps[:3] | |
# x_0_expanded = x_0.expand(len(noise_timesteps), -1, -1, -1) | |
# noise = torch.randn(x_0_expanded.size(), generator=generator, device="cpu", dtype=x_0.dtype).to(x_0.device) | |
# x_ts = scheduler.add_noise(x_0_expanded, noise, torch.IntTensor(noise_timesteps)) | |
# x_ts = [t.unsqueeze(dim=0) for t in list(x_ts)] | |
# x_ts += [x_0] | |
# return x_ts | |
def deterministic_ddpm_step( | |
model_output: torch.FloatTensor, | |
timestep, | |
sample: torch.FloatTensor, | |
eta, | |
use_clipped_model_output, | |
generator, | |
variance_noise, | |
return_dict, | |
scheduler, | |
): | |
""" | |
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion | |
process from the learned model outputs (most often the predicted noise). | |
Args: | |
model_output (`torch.FloatTensor`): | |
The direct output from learned diffusion model. | |
timestep (`float`): | |
The current discrete timestep in the diffusion chain. | |
sample (`torch.FloatTensor`): | |
A current instance of a sample created by the diffusion process. | |
generator (`torch.Generator`, *optional*): | |
A random number generator. | |
return_dict (`bool`, *optional*, defaults to `True`): | |
Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`. | |
Returns: | |
[`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`: | |
If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a | |
tuple is returned where the first element is the sample tensor. | |
""" | |
t = timestep | |
prev_t = scheduler.previous_timestep(t) | |
if model_output.shape[1] == sample.shape[1] * 2 and scheduler.variance_type in [ | |
"learned", | |
"learned_range", | |
]: | |
model_output, predicted_variance = torch.split( | |
model_output, sample.shape[1], dim=1 | |
) | |
else: | |
predicted_variance = None | |
# 1. compute alphas, betas | |
alpha_prod_t = scheduler.alphas_cumprod[t] | |
alpha_prod_t_prev = ( | |
scheduler.alphas_cumprod[prev_t] if prev_t >= 0 else scheduler.one | |
) | |
beta_prod_t = 1 - alpha_prod_t | |
beta_prod_t_prev = 1 - alpha_prod_t_prev | |
current_alpha_t = alpha_prod_t / alpha_prod_t_prev | |
current_beta_t = 1 - current_alpha_t | |
# 2. compute predicted original sample from predicted noise also called | |
# "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf | |
if scheduler.config.prediction_type == "epsilon": | |
pred_original_sample = ( | |
sample - beta_prod_t ** (0.5) * model_output | |
) / alpha_prod_t ** (0.5) | |
elif scheduler.config.prediction_type == "sample": | |
pred_original_sample = model_output | |
elif scheduler.config.prediction_type == "v_prediction": | |
pred_original_sample = (alpha_prod_t**0.5) * sample - ( | |
beta_prod_t**0.5 | |
) * model_output | |
else: | |
raise ValueError( | |
f"prediction_type given as {scheduler.config.prediction_type} must be one of `epsilon`, `sample` or" | |
" `v_prediction` for the DDPMScheduler." | |
) | |
# 3. Clip or threshold "predicted x_0" | |
if scheduler.config.thresholding: | |
pred_original_sample = scheduler._threshold_sample(pred_original_sample) | |
elif scheduler.config.clip_sample: | |
pred_original_sample = pred_original_sample.clamp( | |
-scheduler.config.clip_sample_range, scheduler.config.clip_sample_range | |
) | |
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t | |
# See formula (7) from https://arxiv.org/pdf/2006.11239.pdf | |
pred_original_sample_coeff = ( | |
alpha_prod_t_prev ** (0.5) * current_beta_t | |
) / beta_prod_t | |
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t | |
# 5. Compute predicted previous sample µ_t | |
# See formula (7) from https://arxiv.org/pdf/2006.11239.pdf | |
pred_prev_sample = ( | |
pred_original_sample_coeff * pred_original_sample | |
+ current_sample_coeff * sample | |
) | |
return pred_prev_sample | |
def normalize( | |
z_t, | |
i, | |
max_norm_zs, | |
): | |
max_norm = max_norm_zs[i] | |
if max_norm < 0: | |
return z_t, 1 | |
norm = torch.norm(z_t) | |
if norm < max_norm: | |
return z_t, 1 | |
coeff = max_norm / norm | |
z_t = z_t * coeff | |
return z_t, coeff | |
def step_save_latents( | |
self, | |
model_output: torch.FloatTensor, | |
timestep: int, | |
sample: torch.FloatTensor, | |
eta: float = 0.0, | |
use_clipped_model_output: bool = False, | |
generator=None, | |
variance_noise= None, | |
return_dict: bool = True, | |
): | |
timestep_index = self._inner_index | |
next_timestep_index = timestep_index + 1 | |
u_hat_t = deterministic_ddpm_step( | |
model_output=model_output, | |
timestep=timestep, | |
sample=sample, | |
eta=eta, | |
use_clipped_model_output=use_clipped_model_output, | |
generator=generator, | |
variance_noise=variance_noise, | |
return_dict=False, | |
scheduler=self, | |
) | |
x_t_minus_1 = self.x_ts[timestep_index] | |
self.x_ts_c_hat.append(u_hat_t) | |
z_t = x_t_minus_1 - u_hat_t | |
self.latents.append(z_t) | |
z_t, _ = normalize(z_t, timestep_index, [-1, -1, -1, 15.5]) | |
x_t_minus_1_predicted = u_hat_t + z_t | |
if not return_dict: | |
return (x_t_minus_1_predicted,) | |
return DDIMSchedulerOutput(prev_sample=x_t_minus_1, pred_original_sample=None) | |
def step_use_latents( | |
self, | |
model_output: torch.FloatTensor, | |
timestep: int, | |
sample: torch.FloatTensor, | |
eta: float = 0.0, | |
use_clipped_model_output: bool = False, | |
generator=None, | |
variance_noise= None, | |
return_dict: bool = True, | |
): | |
print(f'_inner_index: {self._inner_index}') | |
timestep_index = self._inner_index | |
next_timestep_index = timestep_index + 1 | |
z_t = self.latents[timestep_index] # + 1 because latents[0] is X_T | |
_, normalize_coefficient = normalize( | |
z_t, | |
timestep_index, | |
[-1, -1, -1, 15.5], | |
) | |
if normalize_coefficient == 0: | |
eta = 0 | |
# eta = normalize_coefficient | |
x_t_hat_c_hat = deterministic_ddpm_step( | |
model_output=model_output, | |
timestep=timestep, | |
sample=sample, | |
eta=eta, | |
use_clipped_model_output=use_clipped_model_output, | |
generator=generator, | |
variance_noise=variance_noise, | |
return_dict=False, | |
scheduler=self, | |
) | |
w1 = ws1[timestep_index] | |
w2 = ws2[timestep_index] | |
x_t_minus_1_exact = self.x_ts[timestep_index] | |
x_t_minus_1_exact = x_t_minus_1_exact.expand_as(x_t_hat_c_hat) | |
x_t_c_hat: torch.Tensor = self.x_ts_c_hat[timestep_index] | |
x_t_c = x_t_c_hat[0].expand_as(x_t_hat_c_hat) | |
zero_index_reconstruction = 0 | |
edit_prompts_num = (model_output.size(0) - zero_index_reconstruction) // 2 | |
x_t_hat_c_indices = (zero_index_reconstruction, edit_prompts_num + zero_index_reconstruction) | |
edit_images_indices = ( | |
edit_prompts_num + zero_index_reconstruction, | |
model_output.size(0) | |
) | |
x_t_hat_c = torch.zeros_like(x_t_hat_c_hat) | |
x_t_hat_c[edit_images_indices[0] : edit_images_indices[1]] = x_t_hat_c_hat[ | |
x_t_hat_c_indices[0] : x_t_hat_c_indices[1] | |
] | |
v1 = x_t_hat_c_hat - x_t_hat_c | |
v2 = x_t_hat_c - normalize_coefficient * x_t_c | |
x_t_minus_1 = normalize_coefficient * x_t_minus_1_exact + w1 * v1 + w2 * v2 | |
x_t_minus_1[x_t_hat_c_indices[0] : x_t_hat_c_indices[1]] = x_t_minus_1[ | |
edit_images_indices[0] : edit_images_indices[1] | |
] # update x_t_hat_c to be x_t_hat_c_hat | |
if not return_dict: | |
return (x_t_minus_1,) | |
return DDIMSchedulerOutput( | |
prev_sample=x_t_minus_1, | |
pred_original_sample=None, | |
) | |
class myDDPMScheduler(DDPMScheduler): | |
def step( | |
self, | |
model_output: torch.FloatTensor, | |
timestep: int, | |
sample: torch.FloatTensor, | |
eta: float = 0.0, | |
use_clipped_model_output: bool = False, | |
generator=None, | |
variance_noise= None, | |
return_dict: bool = True, | |
): | |
print(f"timestep: {timestep}") | |
res_inv = step_save_latents( | |
self, | |
model_output[:1, :, :, :], | |
timestep, | |
sample[:1, :, :, :], | |
eta, | |
use_clipped_model_output, | |
generator, | |
variance_noise, | |
return_dict, | |
) | |
res_inf = step_use_latents( | |
self, | |
model_output[1:, :, :, :], | |
timestep, | |
sample[1:, :, :, :], | |
eta, | |
use_clipped_model_output, | |
generator, | |
variance_noise, | |
return_dict, | |
) | |
self._inner_index+=1 | |
res = (torch.cat((res_inv[0], res_inf[0]), dim=0),) | |
return res | |
pipeline = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", safety_checker = None) | |
pipeline = pipeline.to(device) | |
pipeline.scheduler = DDPMScheduler.from_pretrained( # type: ignore | |
'stabilityai/sdxl-turbo', | |
subfolder="scheduler", | |
# cache_dir="/home/joberant/NLP_2223/giladd/test_dir/sdxl-turbo/models_cache", | |
) | |
# pipeline.scheduler = DDPMScheduler.from_config(pipeline.scheduler.config) | |
denoising_start = 0.2 | |
timesteps, num_inference_steps = retrieve_timesteps( | |
pipeline.scheduler, num_steps_inversion, device, None | |
) | |
timesteps, num_inference_steps = pipeline.get_timesteps( | |
num_inference_steps=num_inference_steps, | |
device=device, | |
denoising_start=denoising_start, | |
strength=0, | |
) | |
timesteps = timesteps.type(torch.int64) | |
from functools import partial | |
timesteps = [torch.tensor(t) for t in timesteps.tolist()] | |
pipeline.__call__ = partial( | |
pipeline.__call__, | |
num_inference_steps=num_steps_inversion, | |
guidance_scale=0, | |
generator=generator, | |
denoising_start=denoising_start, | |
strength=0, | |
) | |
# timesteps, num_inference_steps = retrieve_timesteps(pipeline.scheduler, num_steps_inversion, device, None) | |
# timesteps, num_inference_steps = pipeline.get_timesteps(num_inference_steps=num_inference_steps, device=device, strength=strngth) | |
from utils import get_ddpm_inversion_scheduler, create_xts | |
from config import get_config, get_config_name | |
import argparse | |
# parser = argparse.ArgumentParser() | |
# parser.add_argument("--images_paths", type=str, default=None) | |
# parser.add_argument("--images_folder", type=str, default=None) | |
# parser.set_defaults(force_use_cpu=False) | |
# parser.add_argument("--force_use_cpu", action="store_true") | |
# parser.add_argument("--folder_name", type=str, default='test_measure_time') | |
# parser.add_argument("--config_from_file", type=str, default='run_configs/noise_shift_guidance_1_5.yaml') | |
# parser.set_defaults(save_intermediate_results=False) | |
# parser.add_argument("--save_intermediate_results", action="store_true") | |
# parser.add_argument("--batch_size", type=int, default=None) | |
# parser.set_defaults(skip_p_to_p=False) | |
# parser.add_argument("--skip_p_to_p", action="store_true", default=True) | |
# parser.set_defaults(only_p_to_p=False) | |
# parser.add_argument("--only_p_to_p", action="store_true") | |
# parser.set_defaults(fp16=False) | |
# parser.add_argument("--fp16", action="store_true", default=False) | |
# parser.add_argument("--prompts_file", type=str, default='dataset_measure_time/dataset.json') | |
# parser.add_argument("--images_in_prompts_file", type=str, default=None) | |
# parser.add_argument("--seed", type=int, default=2) | |
# parser.add_argument("--time_measure_n", type=int, default=1) | |
# args = parser.parse_args() | |
class Object(object): | |
pass | |
args = Object() | |
args.images_paths = None | |
args.images_folder = None | |
args.force_use_cpu = False | |
args.folder_name = 'test_measure_time' | |
args.config_from_file = 'run_configs/noise_shift_guidance_1_5.yaml' | |
args.save_intermediate_results = False | |
args.batch_size = None | |
args.skip_p_to_p = True | |
args.only_p_to_p = False | |
args.fp16 = False | |
args.prompts_file = 'dataset_measure_time/dataset.json' | |
args.images_in_prompts_file = None | |
args.seed = 986 | |
args.time_measure_n = 1 | |
assert ( | |
args.batch_size is None or args.save_intermediate_results is False | |
), "save_intermediate_results is not implemented for batch_size > 1" | |
config = get_config(args) | |
# latent = latents[0].expand(3, -1, -1, -1) | |
# prompt = [src_prompt, src_prompt, tgt_prompt] | |
# image = pipeline.__call__(image=latent, prompt=prompt, eta=1).images | |
# for i, im in enumerate(image): | |
# im.save(f"output_{i}.png") | |
def run(image_path, src_prompt, tgt_prompt, seed, w1, w2): | |
generator = torch.Generator().manual_seed(seed) | |
x_0_image = Image.open(image_path).convert("RGB").resize((512, 512), Image.LANCZOS) | |
x_0 = encode_image(x_0_image, pipeline) | |
# x_ts = create_xts(pipeline.scheduler, timesteps, x_0, noise_shift_delta=1, generator=generator) | |
x_ts = create_xts(1, None, 0, generator, pipeline.scheduler, timesteps, x_0, no_add_noise=False) | |
x_ts = [xt.to(dtype=torch.float16) for xt in x_ts] | |
latents = [x_ts[0]] | |
x_ts_c_hat = [None] | |
config.ws1 = [w1] * 4 | |
config.ws2 = [w2] * 4 | |
pipeline.scheduler = get_ddpm_inversion_scheduler( | |
pipeline.scheduler, | |
config.step_function, | |
config, | |
timesteps, | |
config.save_timesteps, | |
latents, | |
x_ts, | |
x_ts_c_hat, | |
args.save_intermediate_results, | |
pipeline, | |
x_0, | |
v1s_images := [], | |
v2s_images := [], | |
deltas_images := [], | |
v1_x0s := [], | |
v2_x0s := [], | |
deltas_x0s := [], | |
"res12", | |
image_name="im_name", | |
time_measure_n=args.time_measure_n, | |
) | |
latent = latents[0].expand(3, -1, -1, -1) | |
prompt = [src_prompt, src_prompt, tgt_prompt] | |
image = pipeline.__call__(image=latent, prompt=prompt, eta=1).images | |
return image[2] | |
if __name__ == "__main__": | |
res = run(image_path, src_prompt, tgt_prompt, args.seed, 1.5, 1.0) | |
res.save("output.png") | |