TheNetherWatcher's picture
Upload folder using huggingface_hub
d0ffe9c verified
import glob
import logging
import os
import shutil
from pathlib import Path
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from tqdm.rich import tqdm
logger = logging.getLogger(__name__)
#https://github.com/jinwonkim93/laplacian-pyramid-blend
#https://blog.shikoan.com/pytorch-laplacian-pyramid/
class LaplacianPyramidBlender:
device = None
def get_gaussian_kernel(self):
kernel = np.array([
[1, 4, 6, 4, 1],
[4, 16, 24, 16, 4],
[6, 24, 36, 24, 6],
[4, 16, 24, 16, 4],
[1, 4, 6, 4, 1]], np.float32) / 256.0
gaussian_k = torch.as_tensor(kernel.reshape(1, 1, 5, 5),device=self.device)
return gaussian_k
def pyramid_down(self, image):
with torch.no_grad():
gaussian_k = self.get_gaussian_kernel()
multiband = [F.conv2d(image[:, i:i + 1,:,:], gaussian_k, padding=2, stride=2) for i in range(3)]
down_image = torch.cat(multiband, dim=1)
return down_image
def pyramid_up(self, image, size = None):
with torch.no_grad():
gaussian_k = self.get_gaussian_kernel()
if size is None:
upsample = F.interpolate(image, scale_factor=2)
else:
upsample = F.interpolate(image, size=size)
multiband = [F.conv2d(upsample[:, i:i + 1,:,:], gaussian_k, padding=2) for i in range(3)]
up_image = torch.cat(multiband, dim=1)
return up_image
def gaussian_pyramid(self, original, n_pyramids):
x = original
# pyramid down
pyramids = [original]
for i in range(n_pyramids):
x = self.pyramid_down(x)
pyramids.append(x)
return pyramids
def laplacian_pyramid(self, original, n_pyramids):
pyramids = self.gaussian_pyramid(original, n_pyramids)
# pyramid up - diff
laplacian = []
for i in range(len(pyramids) - 1):
diff = pyramids[i] - self.pyramid_up(pyramids[i + 1], pyramids[i].shape[2:])
laplacian.append(diff)
laplacian.append(pyramids[-1])
return laplacian
def laplacian_pyramid_blending_with_mask(self, src, target, mask, num_levels = 9):
# assume mask is float32 [0,1]
# generate Gaussian pyramid for src,target and mask
Gsrc = torch.as_tensor(np.expand_dims(src, axis=0), device=self.device)
Gtarget = torch.as_tensor(np.expand_dims(target, axis=0), device=self.device)
Gmask = torch.as_tensor(np.expand_dims(mask, axis=0), device=self.device)
lpA = self.laplacian_pyramid(Gsrc,num_levels)[::-1]
lpB = self.laplacian_pyramid(Gtarget,num_levels)[::-1]
gpMr = self.gaussian_pyramid(Gmask,num_levels)[::-1]
# Now blend images according to mask in each level
LS = []
for idx, (la,lb,Gmask) in enumerate(zip(lpA,lpB,gpMr)):
lo = lb * (1.0 - Gmask)
if idx <= 2:
lo += lb * Gmask
else:
lo += la * Gmask
LS.append(lo)
# now reconstruct
ls_ = LS.pop(0)
for lap in LS:
ls_ = self.pyramid_up(ls_, lap.shape[2:]) + lap
result = ls_.squeeze(dim=0).to('cpu').detach().numpy().copy()
return result
def __call__(self,
src_image: np.ndarray,
target_image: np.ndarray,
mask_image: np.ndarray,
device
):
self.device = device
num_levels = int(np.log2(src_image.shape[0]))
#normalize image to 0, 1
mask_image = np.clip(mask_image, 0, 1).transpose([2, 0, 1])
src_image = src_image.transpose([2, 0, 1]).astype(np.float32) / 255.0
target_image = target_image.transpose([2, 0, 1]).astype(np.float32) / 255.0
composite_image = self.laplacian_pyramid_blending_with_mask(src_image, target_image, mask_image, num_levels)
composite_image = np.clip(composite_image*255, 0 , 255).astype(np.uint8)
composite_image=composite_image.transpose([1, 2, 0])
return composite_image
def composite(bg_dir, fg_list, output_dir, masked_area_list, device="cuda"):
bg_list = sorted(glob.glob( os.path.join(bg_dir ,"[0-9]*.png"), recursive=False))
blender = LaplacianPyramidBlender()
for bg, fg_array, mask in tqdm(zip(bg_list, fg_list, masked_area_list),total=len(bg_list), desc="compositing"):
name = Path(bg).name
save_path = output_dir / name
if fg_array is None:
logger.info(f"composite fg_array is None -> skip")
shutil.copy(bg, save_path)
continue
if mask is None:
logger.info(f"mask is None -> skip")
shutil.copy(bg, save_path)
continue
bg = np.asarray(Image.open(bg)).copy()
fg = fg_array
mask = np.concatenate([mask, mask, mask], 2)
h, w, _ = bg.shape
fg = cv2.resize(fg, dsize=(w,h))
mask = cv2.resize(mask, dsize=(w,h))
mask = mask.astype(np.float32)
# mask = mask * 255
mask = cv2.GaussianBlur(mask, (15, 15), 0)
mask = mask / 255
fg = fg * mask + bg * (1-mask)
img = blender(fg, bg, mask,device)
img = Image.fromarray(img)
img.save(save_path)
def simple_composite(bg_dir, fg_list, output_dir, masked_area_list, device="cuda"):
bg_list = sorted(glob.glob( os.path.join(bg_dir ,"[0-9]*.png"), recursive=False))
for bg, fg_array, mask in tqdm(zip(bg_list, fg_list, masked_area_list),total=len(bg_list), desc="compositing"):
name = Path(bg).name
save_path = output_dir / name
if fg_array is None:
logger.info(f"composite fg_array is None -> skip")
shutil.copy(bg, save_path)
continue
if mask is None:
logger.info(f"mask is None -> skip")
shutil.copy(bg, save_path)
continue
bg = np.asarray(Image.open(bg)).copy()
fg = fg_array
mask = np.concatenate([mask, mask, mask], 2)
h, w, _ = bg.shape
fg = cv2.resize(fg, dsize=(w,h))
mask = cv2.resize(mask, dsize=(w,h))
mask = mask.astype(np.float32)
mask = cv2.GaussianBlur(mask, (15, 15), 0)
mask = mask / 255
img = fg * mask + bg * (1-mask)
img = img.clip(0 , 255).astype(np.uint8)
img = Image.fromarray(img)
img.save(save_path)