TheNetherWatcher's picture
Upload folder using huggingface_hub
d0ffe9c verified
import glob
import json
import logging
import os.path
import shutil
from datetime import datetime
from pathlib import Path
from typing import Annotated, Optional
import torch
import typer
from PIL import Image
from tqdm.rich import tqdm
from animatediff import __version__, get_dir
from animatediff.settings import ModelConfig, get_model_config
from animatediff.utils.tagger import get_labels
from animatediff.utils.util import (extract_frames, get_resized_image,
path_from_cwd, prepare_anime_seg,
prepare_groundingDINO, prepare_propainter,
prepare_sam_hq, prepare_softsplat)
logger = logging.getLogger(__name__)
stylize: typer.Typer = typer.Typer(
name="stylize",
context_settings=dict(help_option_names=["-h", "--help"]),
rich_markup_mode="rich",
pretty_exceptions_show_locals=False,
help="stylize video",
)
data_dir = get_dir("data")
controlnet_dirs = [
"controlnet_canny",
"controlnet_depth",
"controlnet_inpaint",
"controlnet_ip2p",
"controlnet_lineart",
"controlnet_lineart_anime",
"controlnet_mlsd",
"controlnet_normalbae",
"controlnet_openpose",
"controlnet_scribble",
"controlnet_seg",
"controlnet_shuffle",
"controlnet_softedge",
"controlnet_tile",
"qr_code_monster_v1",
"qr_code_monster_v2",
"controlnet_mediapipe_face",
"animatediff_controlnet",
]
def create_controlnet_dir(controlnet_root):
for c in controlnet_dirs:
c_dir = controlnet_root.joinpath(c)
c_dir.mkdir(parents=True, exist_ok=True)
@stylize.command(no_args_is_help=True)
def create_config(
org_movie: Annotated[
Path,
typer.Argument(path_type=Path, file_okay=True, dir_okay=False, exists=True, help="Path to movie file"),
] = ...,
config_org: Annotated[
Path,
typer.Option(
"--config-org",
"-c",
path_type=Path,
dir_okay=False,
exists=True,
help="Path to original config file",
),
] = Path("config/prompts/prompt_travel.json"),
ignore_list: Annotated[
Path,
typer.Option(
"--ignore-list",
"-g",
path_type=Path,
dir_okay=False,
exists=True,
help="path to ignore token list file",
),
] = Path("config/prompts/ignore_tokens.txt"),
out_dir: Annotated[
Optional[Path],
typer.Option(
"--out-dir",
"-o",
path_type=Path,
file_okay=False,
help="output directory",
),
] = Path("stylize/"),
fps: Annotated[
int,
typer.Option(
"--fps",
"-f",
min=1,
max=120,
help="fps",
),
] = 8,
duration: Annotated[
int,
typer.Option(
"--duration",
"-d",
min=-1,
max=3600,
help="Video duration in seconds. -1 means that the duration of the input video is used as is",
),
] = -1,
offset: Annotated[
int,
typer.Option(
"--offset",
"-of",
min=0,
max=3600,
help="offset in seconds. '-d 30 -of 1200' means to use 1200-1230 seconds of the input video",
),
] = 0,
aspect_ratio: Annotated[
float,
typer.Option(
"--aspect-ratio",
"-a",
min=-1,
max=5.0,
help="aspect ratio (width / height). (ex. 512 / 512 = 1.0 , 512 / 768 = 0.6666 , 768 / 512 = 1.5) -1 means that the aspect ratio of the input video is used as is.",
),
] = -1,
size_of_short_edge: Annotated[
int,
typer.Option(
"--short-edge",
"-sh",
min=100,
max=1024,
help="size of short edge",
),
] = 512,
predicte_interval: Annotated[
int,
typer.Option(
"--predicte-interval",
"-p",
min=1,
max=120,
help="Interval of frames to be predicted",
),
] = 1,
general_threshold: Annotated[
float,
typer.Option(
"--threshold",
"-th",
min=0.0,
max=1.0,
help="threshold for general token confidence",
),
] = 0.35,
character_threshold: Annotated[
float,
typer.Option(
"--threshold2",
"-th2",
min=0.0,
max=1.0,
help="threshold for character token confidence",
),
] = 0.85,
without_confidence: Annotated[
bool,
typer.Option(
"--no-confidence-format",
"-ncf",
is_flag=True,
help="confidence token format or not. ex. '(close-up:0.57), (monochrome:1.1)' -> 'close-up, monochrome'",
),
] = False,
is_no_danbooru_format: Annotated[
bool,
typer.Option(
"--no-danbooru-format",
"-ndf",
is_flag=True,
help="danbooru token format or not. ex. 'bandaid_on_leg, short_hair' -> 'bandaid on leg, short hair'",
),
] = False,
is_img2img: Annotated[
bool,
typer.Option(
"--img2img",
"-i2i",
is_flag=True,
help="img2img or not(txt2img).",
),
] = False,
low_vram: Annotated[
bool,
typer.Option(
"--low_vram",
"-lo",
is_flag=True,
help="low vram mode",
),
] = False,
gradual_latent_hires_fix: Annotated[
bool,
typer.Option(
"--gradual_latent_hires_fix",
"-gh",
is_flag=True,
help="gradual latent hires fix",
),
] = False,
):
"""Create a config file for video stylization"""
is_danbooru_format = not is_no_danbooru_format
with_confidence = not without_confidence
logger.info(f"{org_movie=}")
logger.info(f"{config_org=}")
logger.info(f"{ignore_list=}")
logger.info(f"{out_dir=}")
logger.info(f"{fps=}")
logger.info(f"{duration=}")
logger.info(f"{offset=}")
logger.info(f"{aspect_ratio=}")
logger.info(f"{size_of_short_edge=}")
logger.info(f"{predicte_interval=}")
logger.info(f"{general_threshold=}")
logger.info(f"{character_threshold=}")
logger.info(f"{with_confidence=}")
logger.info(f"{is_danbooru_format=}")
logger.info(f"{is_img2img=}")
logger.info(f"{low_vram=}")
logger.info(f"{gradual_latent_hires_fix=}")
model_config: ModelConfig = get_model_config(config_org)
# get a timestamp for the output directory
time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
# make the output directory
save_dir = out_dir.joinpath(f"{time_str}-{model_config.save_name}")
save_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Will save outputs to ./{path_from_cwd(save_dir)}")
img2img_dir = save_dir.joinpath("00_img2img")
img2img_dir.mkdir(parents=True, exist_ok=True)
extract_frames(org_movie, fps, img2img_dir, aspect_ratio, duration, offset, size_of_short_edge, low_vram)
controlnet_img_dir = save_dir.joinpath("00_controlnet_image")
create_controlnet_dir(controlnet_img_dir)
shutil.copytree(img2img_dir, controlnet_img_dir.joinpath("controlnet_openpose"), dirs_exist_ok=True)
#shutil.copytree(img2img_dir, controlnet_img_dir.joinpath("controlnet_ip2p"), dirs_exist_ok=True)
black_list = []
if ignore_list.is_file():
with open(ignore_list) as f:
black_list = [s.strip() for s in f.readlines()]
model_config.prompt_map = get_labels(
frame_dir=img2img_dir,
interval=predicte_interval,
general_threshold=general_threshold,
character_threshold=character_threshold,
ignore_tokens=black_list,
with_confidence=with_confidence,
is_danbooru_format=is_danbooru_format,
is_cpu = False,
)
model_config.head_prompt = ""
model_config.tail_prompt = ""
model_config.controlnet_map["input_image_dir"] = os.path.relpath(controlnet_img_dir.absolute(), data_dir)
model_config.controlnet_map["is_loop"] = False
model_config.lora_map={}
model_config.motion_lora_map={}
model_config.controlnet_map["max_samples_on_vram"] = 0
model_config.controlnet_map["max_models_on_vram"] = 0
model_config.controlnet_map["controlnet_openpose"] = {
"enable": True,
"use_preprocessor":True,
"guess_mode":False,
"controlnet_conditioning_scale": 1.0,
"control_guidance_start": 0.0,
"control_guidance_end": 1.0,
"control_scale_list":[],
"control_region_list":[]
}
model_config.controlnet_map["controlnet_ip2p"] = {
"enable": True,
"use_preprocessor":True,
"guess_mode":False,
"controlnet_conditioning_scale": 0.5,
"control_guidance_start": 0.0,
"control_guidance_end": 1.0,
"control_scale_list":[],
"control_region_list":[]
}
for m in model_config.controlnet_map:
if isinstance(model_config.controlnet_map[m] ,dict):
if "control_scale_list" in model_config.controlnet_map[m]:
model_config.controlnet_map[m]["control_scale_list"]=[]
ip_adapter_dir = save_dir.joinpath("00_ipadapter")
ip_adapter_dir.mkdir(parents=True, exist_ok=True)
model_config.ip_adapter_map = {
"enable": True,
"input_image_dir": os.path.relpath(ip_adapter_dir.absolute(), data_dir),
"prompt_fixed_ratio": 0.5,
"save_input_image": True,
"resized_to_square": False,
"scale": 0.5,
"is_full_face": False,
"is_plus_face": False,
"is_plus": True,
"is_light": False
}
model_config.img2img_map = {
"enable": is_img2img,
"init_img_dir" : os.path.relpath(img2img_dir.absolute(), data_dir),
"save_init_image": True,
"denoising_strength" : 0.7
}
model_config.region_map = {
}
model_config.gradual_latent_hires_fix_map = {
"enable" : True,
"scale" : {
"0": 0.5,
"0.7": 1.0
},
"reverse_steps": 5,
"noise_add_count": 3
}
model_config.output = {
"format" : "mp4",
"fps" : fps,
"encode_param":{
"crf": 10
}
}
img = Image.open( img2img_dir.joinpath("00000000.png") )
W, H = img.size
base_size = 768 if gradual_latent_hires_fix else 512
if W < H:
width = base_size
height = int(base_size * H/W)
else:
width = int(base_size * W/H)
height = base_size
width = int(width//8*8)
height = int(height//8*8)
length = len(glob.glob( os.path.join(img2img_dir, "[0-9]*.png"), recursive=False))
model_config.stylize_config={
"original_video":{
"path":org_movie,
"aspect_ratio":aspect_ratio,
"offset":offset,
},
"create_mask": [
"person"
],
"composite": {
"fg_list": [
{
"path" : " absolute path to frame dir ",
"mask_path" : " absolute path to mask dir (this is optional) ",
"mask_prompt" : "person"
},
{
"path" : " absolute path to frame dir ",
"mask_path" : " absolute path to mask dir (this is optional) ",
"mask_prompt" : "cat"
},
],
"bg_frame_dir": "Absolute path to the BG frame directory",
"hint": ""
},
"0":{
"width": width,
"height": height,
"length": length,
"context": 16,
"overlap": 16//4,
"stride": 0,
},
"1":{
"steps": model_config.steps,
"guidance_scale": model_config.guidance_scale,
"width": int(width * 1.5 //8*8),
"height": int(height * 1.5 //8*8),
"length": length,
"context": 8,
"overlap": 8//4,
"stride": 0,
"controlnet_tile":{
"enable": True,
"use_preprocessor":True,
"guess_mode":False,
"controlnet_conditioning_scale": 1.0,
"control_guidance_start": 0.0,
"control_guidance_end": 1.0,
"control_scale_list":[]
},
"controlnet_ip2p": {
"enable": False,
"use_preprocessor":True,
"guess_mode":False,
"controlnet_conditioning_scale": 0.5,
"control_guidance_start": 0.0,
"control_guidance_end": 1.0,
"control_scale_list":[]
},
"ip_adapter": False,
"reference": False,
"img2img": False,
"interpolation_multiplier": 1
}
}
if gradual_latent_hires_fix:
model_config.stylize_config.pop("1")
save_config_path = save_dir.joinpath("prompt.json")
save_config_path.write_text(model_config.json(indent=4), encoding="utf-8")
logger.info(f"config = { save_config_path }")
logger.info(f"stylize_dir = { save_dir }")
logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
logger.info(f"Hint. Edit the config file before starting the generation")
logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
logger.info(f"1. Change 'path' and 'motion_module' as needed")
logger.info(f"2. Enter the 'head_prompt' or 'tail_prompt' with your preferred prompt, quality prompt, lora trigger word, or any other prompt you wish to add.")
logger.info(f"3. Change 'n_prompt' as needed")
logger.info(f"4. Add the lora you need to 'lora_map'")
logger.info(f"5. If you do not like the default settings, edit 'ip_adapter_map' or 'controlnet_map'. \nIf you want to change the controlnet type, you need to replace the input image.")
logger.info(f"6. Change 'stylize_config' as needed. By default, it is generated twice: once for normal generation and once for upscaling.\nIf you don't need upscaling, delete the whole '1'.")
logger.info(f"7. Change 'output' as needed. Changing the 'fps' at this timing is not recommended as it will change the playback speed.\nIf you want to change the fps, specify it with the create-config option")
@stylize.command(no_args_is_help=True)
def generate(
stylize_dir: Annotated[
Path,
typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"),
] = ...,
length: Annotated[
int,
typer.Option(
"--length",
"-L",
min=-1,
max=9999,
help="Number of frames to generate. -1 means that the value in the config file is referenced.",
rich_help_panel="Generation",
),
] = -1,
frame_offset: Annotated[
int,
typer.Option(
"--frame-offset",
"-FO",
min=0,
max=999999,
help="Frame offset at generation.",
rich_help_panel="Generation",
),
] = 0,
):
"""Run video stylization"""
from animatediff.cli import generate
time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
config_org = stylize_dir.joinpath("prompt.json")
model_config: ModelConfig = get_model_config(config_org)
if length == -1:
length = model_config.stylize_config["0"]["length"]
model_config.stylize_config["0"]["length"] = min(model_config.stylize_config["0"]["length"] - frame_offset, length)
if "1" in model_config.stylize_config:
model_config.stylize_config["1"]["length"] = min(model_config.stylize_config["1"]["length"] - frame_offset, length)
if frame_offset > 0:
#controlnet
org_controlnet_img_dir = data_dir.joinpath( model_config.controlnet_map["input_image_dir"] )
new_controlnet_img_dir = org_controlnet_img_dir.parent / "00_tmp_controlnet_image"
if new_controlnet_img_dir.is_dir():
shutil.rmtree(new_controlnet_img_dir)
new_controlnet_img_dir.mkdir(parents=True, exist_ok=True)
for c in controlnet_dirs:
src_dir = org_controlnet_img_dir.joinpath(c)
dst_dir = new_controlnet_img_dir.joinpath(c)
if src_dir.is_dir():
dst_dir.mkdir(parents=True, exist_ok=True)
frame_length = model_config.stylize_config["0"]["length"]
src_imgs = sorted(glob.glob( os.path.join(src_dir, "[0-9]*.png"), recursive=False))
for img in src_imgs:
n = int(Path(img).stem)
if n in range(frame_offset, frame_offset + frame_length):
dst_img_path = dst_dir.joinpath( f"{n-frame_offset:08d}.png" )
shutil.copy(img, dst_img_path)
#img2img
org_img2img_img_dir = data_dir.joinpath( model_config.img2img_map["init_img_dir"] )
new_img2img_img_dir = org_img2img_img_dir.parent / "00_tmp_init_img_dir"
if new_img2img_img_dir.is_dir():
shutil.rmtree(new_img2img_img_dir)
new_img2img_img_dir.mkdir(parents=True, exist_ok=True)
src_dir = org_img2img_img_dir
dst_dir = new_img2img_img_dir
if src_dir.is_dir():
dst_dir.mkdir(parents=True, exist_ok=True)
frame_length = model_config.stylize_config["0"]["length"]
src_imgs = sorted(glob.glob( os.path.join(src_dir, "[0-9]*.png"), recursive=False))
for img in src_imgs:
n = int(Path(img).stem)
if n in range(frame_offset, frame_offset + frame_length):
dst_img_path = dst_dir.joinpath( f"{n-frame_offset:08d}.png" )
shutil.copy(img, dst_img_path)
new_prompt_map = {}
for p in model_config.prompt_map:
n = int(p)
if n in range(frame_offset, frame_offset + frame_length):
new_prompt_map[str(n-frame_offset)]=model_config.prompt_map[p]
model_config.prompt_map = new_prompt_map
model_config.controlnet_map["input_image_dir"] = os.path.relpath(new_controlnet_img_dir.absolute(), data_dir)
model_config.img2img_map["init_img_dir"] = os.path.relpath(new_img2img_img_dir.absolute(), data_dir)
tmp_config_path = stylize_dir.joinpath("prompt_tmp.json")
tmp_config_path.write_text(model_config.json(indent=4), encoding="utf-8")
config_org = tmp_config_path
output_0_dir = generate(
config_path=config_org,
width=model_config.stylize_config["0"]["width"],
height=model_config.stylize_config["0"]["height"],
length=model_config.stylize_config["0"]["length"],
context=model_config.stylize_config["0"]["context"],
overlap=model_config.stylize_config["0"]["overlap"],
stride=model_config.stylize_config["0"]["stride"],
out_dir=stylize_dir
)
torch.cuda.empty_cache()
output_0_dir = output_0_dir.rename(output_0_dir.parent / f"{time_str}_{0:02d}")
if "1" not in model_config.stylize_config:
logger.info(f"Stylized results are output to {output_0_dir}")
return
logger.info(f"Intermediate files have been output to {output_0_dir}")
output_0_img_dir = glob.glob( os.path.join(output_0_dir, "00-[0-9]*"), recursive=False)[0]
interpolation_multiplier = 1
if "interpolation_multiplier" in model_config.stylize_config["1"]:
interpolation_multiplier = model_config.stylize_config["1"]["interpolation_multiplier"]
if interpolation_multiplier > 1:
from animatediff.rife.rife import rife_interpolate
rife_img_dir = stylize_dir.joinpath(f"{1:02d}_rife_frame")
if rife_img_dir.is_dir():
shutil.rmtree(rife_img_dir)
rife_img_dir.mkdir(parents=True, exist_ok=True)
rife_interpolate(output_0_img_dir, rife_img_dir, interpolation_multiplier)
model_config.stylize_config["1"]["length"] *= interpolation_multiplier
if model_config.output:
model_config.output["fps"] *= interpolation_multiplier
if model_config.prompt_map:
model_config.prompt_map = { str(int(i)*interpolation_multiplier): model_config.prompt_map[i] for i in model_config.prompt_map }
output_0_img_dir = rife_img_dir
controlnet_img_dir = stylize_dir.joinpath("01_controlnet_image")
img2img_dir = stylize_dir.joinpath("01_img2img")
img2img_dir.mkdir(parents=True, exist_ok=True)
create_controlnet_dir(controlnet_img_dir)
ip2p_for_upscale = model_config.stylize_config["1"]["controlnet_ip2p"]["enable"]
ip_adapter_for_upscale = model_config.stylize_config["1"]["ip_adapter"]
ref_for_upscale = model_config.stylize_config["1"]["reference"]
shutil.copytree(output_0_img_dir, controlnet_img_dir.joinpath("controlnet_tile"), dirs_exist_ok=True)
if ip2p_for_upscale:
shutil.copytree(controlnet_img_dir.joinpath("controlnet_tile"), controlnet_img_dir.joinpath("controlnet_ip2p"), dirs_exist_ok=True)
shutil.copytree(controlnet_img_dir.joinpath("controlnet_tile"), img2img_dir, dirs_exist_ok=True)
model_config.controlnet_map["input_image_dir"] = os.path.relpath(controlnet_img_dir.absolute(), data_dir)
model_config.controlnet_map["controlnet_tile"] = model_config.stylize_config["1"]["controlnet_tile"]
model_config.controlnet_map["controlnet_ip2p"] = model_config.stylize_config["1"]["controlnet_ip2p"]
if "controlnet_ref" in model_config.controlnet_map:
model_config.controlnet_map["controlnet_ref"]["enable"] = ref_for_upscale
model_config.ip_adapter_map["enable"] = ip_adapter_for_upscale
for r in model_config.region_map:
reg = model_config.region_map[r]
if "condition" in reg:
if "ip_adapter_map" in reg["condition"]:
reg["condition"]["ip_adapter_map"]["enable"] = ip_adapter_for_upscale
model_config.steps = model_config.stylize_config["1"]["steps"] if "steps" in model_config.stylize_config["1"] else model_config.steps
model_config.guidance_scale = model_config.stylize_config["1"]["guidance_scale"] if "guidance_scale" in model_config.stylize_config["1"] else model_config.guidance_scale
model_config.img2img_map["enable"] = model_config.stylize_config["1"]["img2img"]
if model_config.img2img_map["enable"]:
model_config.img2img_map["init_img_dir"] = os.path.relpath(Path(output_0_img_dir).absolute(), data_dir)
save_config_path = stylize_dir.joinpath("prompt_01.json")
save_config_path.write_text(model_config.json(indent=4), encoding="utf-8")
output_1_dir = generate(
config_path=save_config_path,
width=model_config.stylize_config["1"]["width"],
height=model_config.stylize_config["1"]["height"],
length=model_config.stylize_config["1"]["length"],
context=model_config.stylize_config["1"]["context"],
overlap=model_config.stylize_config["1"]["overlap"],
stride=model_config.stylize_config["1"]["stride"],
out_dir=stylize_dir
)
output_1_dir = output_1_dir.rename(output_1_dir.parent / f"{time_str}_{1:02d}")
logger.info(f"Stylized results are output to {output_1_dir}")
@stylize.command(no_args_is_help=True)
def interpolate(
frame_dir: Annotated[
Path,
typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to frame dir"),
] = ...,
interpolation_multiplier: Annotated[
int,
typer.Option(
"--interpolation_multiplier",
"-m",
min=1,
max=10,
help="interpolation_multiplier",
),
] = 1,
):
"""Interpolation with original frames. This function does not work well if the shape of the subject is changed from the original video. Large movements can also ruin the picture.(Since this command is experimental, it is better to use other interpolation methods in most cases.)"""
try:
import cupy
except:
logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
logger.info(f"cupy is required to run interpolate")
logger.info(f"Your CUDA version is {torch.version.cuda}")
logger.info(f"Please find the installation method of cupy for your CUDA version from the following URL")
logger.info(f"https://docs.cupy.dev/en/latest/install.html#installing-cupy-from-pypi")
logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
return
prepare_softsplat()
time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
config_org = frame_dir.parent.joinpath("prompt.json")
model_config: ModelConfig = get_model_config(config_org)
if "original_video" in model_config.stylize_config:
org_video = Path(model_config.stylize_config["original_video"]["path"])
offset = model_config.stylize_config["original_video"]["offset"]
aspect_ratio = model_config.stylize_config["original_video"]["aspect_ratio"]
else:
logger.warn('!!! The following parameters are required !!!')
logger.warn('"stylize_config": {')
logger.warn(' "original_video": {')
logger.warn(' "path": "C:\\my_movie\\test.mp4",')
logger.warn(' "aspect_ratio": 0.6666,')
logger.warn(' "offset": 0')
logger.warn(' },')
raise ValueError('model_config.stylize_config["original_video"] not found')
save_dir = frame_dir.parent.joinpath(f"optflow_{time_str}")
org_frame_dir = save_dir.joinpath("org_frame")
org_frame_dir.mkdir(parents=True, exist_ok=True)
stylize_frame = sorted(glob.glob( os.path.join(frame_dir, "[0-9]*.png"), recursive=False))
stylize_frame_num = len(stylize_frame)
duration = int(stylize_frame_num / model_config.output["fps"]) + 1
extract_frames(org_video, model_config.output["fps"] * interpolation_multiplier, org_frame_dir,aspect_ratio,duration,offset)
W, H = Image.open(stylize_frame[0]).size
org_frame = sorted(glob.glob( os.path.join(org_frame_dir, "[0-9]*.png"), recursive=False))
for org in tqdm(org_frame):
img = get_resized_image(org, W, H)
img.save(org)
output_dir = save_dir.joinpath("warp_img")
output_dir.mkdir(parents=True, exist_ok=True)
from animatediff.softmax_splatting.run import estimate2
for sty1,sty2 in tqdm(zip(stylize_frame,stylize_frame[1:]), total=len(stylize_frame[1:])):
sty1 = Path(sty1)
sty2 = Path(sty2)
head = int(sty1.stem)
sty1_img = Image.open(sty1)
sty2_img = Image.open(sty2)
guide_frames=[org_frame_dir.joinpath(f"{g:08d}.png") for g in range(head*interpolation_multiplier, (head+1)*interpolation_multiplier)]
guide_frames=[Image.open(g) for g in guide_frames]
result = estimate2(sty1_img, sty2_img, guide_frames, "data/models/softsplat/softsplat-lf")
shutil.copy( frame_dir.joinpath(f"{head:08d}.png"), output_dir.joinpath(f"{head*interpolation_multiplier:08d}.png"))
offset = head*interpolation_multiplier + 1
for i, r in enumerate(result):
r.save( output_dir.joinpath(f"{offset+i:08d}.png") )
from animatediff.generate import save_output
frames = sorted(glob.glob( os.path.join(output_dir, "[0-9]*.png"), recursive=False))
out_images = []
for f in frames:
out_images.append(Image.open(f))
model_config.output["fps"] *= interpolation_multiplier
out_file = save_dir.joinpath(f"01_{model_config.output['fps']}fps")
save_output(out_images,output_dir,out_file,model_config.output,True,save_frames=None,save_video=None)
out_file = save_dir.joinpath(f"00_original")
save_output(out_images,org_frame_dir,out_file,model_config.output,True,save_frames=None,save_video=None)
@stylize.command(no_args_is_help=True)
def create_mask(
stylize_dir: Annotated[
Path,
typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"),
] = ...,
frame_dir: Annotated[
Path,
typer.Option(
"--frame_dir",
"-f",
path_type=Path,
file_okay=False,
help="Path to source frames directory. default is 'STYLIZE_DIR/00_img2img'",
),
] = None,
box_threshold: Annotated[
float,
typer.Option(
"--box_threshold",
"-b",
min=0.0,
max=1.0,
help="box_threshold",
rich_help_panel="create mask",
),
] = 0.3,
text_threshold: Annotated[
float,
typer.Option(
"--text_threshold",
"-t",
min=0.0,
max=1.0,
help="text_threshold",
rich_help_panel="create mask",
),
] = 0.25,
mask_padding: Annotated[
int,
typer.Option(
"--mask_padding",
"-mp",
min=-100,
max=100,
help="padding pixel value",
rich_help_panel="create mask",
),
] = 0,
no_gb: Annotated[
bool,
typer.Option(
"--no_gb",
"-ng",
is_flag=True,
help="no green back",
rich_help_panel="create mask",
),
] = False,
no_crop: Annotated[
bool,
typer.Option(
"--no_crop",
"-nc",
is_flag=True,
help="no crop",
rich_help_panel="create mask",
),
] = False,
use_rembg: Annotated[
bool,
typer.Option(
"--use_rembg",
"-rem",
is_flag=True,
help="use [rembg] instead of [Sam+GroundingDINO]",
rich_help_panel="create mask",
),
] = False,
use_animeseg: Annotated[
bool,
typer.Option(
"--use_animeseg",
"-anim",
is_flag=True,
help="use [anime-segmentation] instead of [Sam+GroundingDINO]",
rich_help_panel="create mask",
),
] = False,
low_vram: Annotated[
bool,
typer.Option(
"--low_vram",
"-lo",
is_flag=True,
help="low vram mode",
rich_help_panel="create mask/tag",
),
] = False,
ignore_list: Annotated[
Path,
typer.Option(
"--ignore-list",
"-g",
path_type=Path,
dir_okay=False,
exists=True,
help="path to ignore token list file",
rich_help_panel="create tag",
),
] = Path("config/prompts/ignore_tokens.txt"),
predicte_interval: Annotated[
int,
typer.Option(
"--predicte-interval",
"-p",
min=1,
max=120,
help="Interval of frames to be predicted",
rich_help_panel="create tag",
),
] = 1,
general_threshold: Annotated[
float,
typer.Option(
"--threshold",
"-th",
min=0.0,
max=1.0,
help="threshold for general token confidence",
rich_help_panel="create tag",
),
] = 0.35,
character_threshold: Annotated[
float,
typer.Option(
"--threshold2",
"-th2",
min=0.0,
max=1.0,
help="threshold for character token confidence",
rich_help_panel="create tag",
),
] = 0.85,
without_confidence: Annotated[
bool,
typer.Option(
"--no-confidence-format",
"-ncf",
is_flag=True,
help="confidence token format or not. ex. '(close-up:0.57), (monochrome:1.1)' -> 'close-up, monochrome'",
rich_help_panel="create tag",
),
] = False,
is_no_danbooru_format: Annotated[
bool,
typer.Option(
"--no-danbooru-format",
"-ndf",
is_flag=True,
help="danbooru token format or not. ex. 'bandaid_on_leg, short_hair' -> 'bandaid on leg, short hair'",
rich_help_panel="create tag",
),
] = False,
):
"""Create mask from prompt"""
from animatediff.utils.mask import (create_bg, create_fg, crop_frames,
crop_mask_list, save_crop_info)
from animatediff.utils.mask_animseg import animseg_create_fg
from animatediff.utils.mask_rembg import rembg_create_fg
is_danbooru_format = not is_no_danbooru_format
with_confidence = not without_confidence
if use_animeseg and use_rembg:
raise ValueError("use_animeseg and use_rembg cannot be enabled at the same time")
prepare_sam_hq(low_vram)
prepare_groundingDINO()
prepare_propainter()
if use_animeseg:
prepare_anime_seg()
time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
config_org = stylize_dir.joinpath("prompt.json")
model_config: ModelConfig = get_model_config(config_org)
if frame_dir is None:
frame_dir = stylize_dir / "00_img2img"
if not frame_dir.is_dir():
raise ValueError(f'{frame_dir=} does not exist.')
is_img2img = model_config.img2img_map["enable"] if "enable" in model_config.img2img_map else False
create_mask_list = []
if "create_mask" in model_config.stylize_config:
create_mask_list = model_config.stylize_config["create_mask"]
else:
raise ValueError('model_config.stylize_config["create_mask"] not found')
output_list = []
stylize_frame = sorted(glob.glob( os.path.join(frame_dir, "[0-9]*.png"), recursive=False))
frame_len = len(stylize_frame)
W, H = Image.open(stylize_frame[0]).size
org_frame_size = (H,W)
masked_area = [None for f in range(frame_len)]
if use_rembg:
create_mask_list = ["rembg"]
elif use_animeseg:
create_mask_list = ["anime-segmentation"]
for i,mask_token in enumerate(create_mask_list):
fg_dir = stylize_dir.joinpath(f"fg_{i:02d}_{time_str}")
fg_dir.mkdir(parents=True, exist_ok=True)
create_controlnet_dir( fg_dir / "00_controlnet_image" )
fg_masked_dir = fg_dir / "00_img2img"
fg_masked_dir.mkdir(parents=True, exist_ok=True)
fg_mask_dir = fg_dir / "00_mask"
fg_mask_dir.mkdir(parents=True, exist_ok=True)
if use_animeseg:
masked_area = animseg_create_fg(
frame_dir=frame_dir,
output_dir=fg_masked_dir,
output_mask_dir=fg_mask_dir,
masked_area_list=masked_area,
mask_padding=mask_padding,
bg_color=None if no_gb else (0,255,0),
)
elif use_rembg:
masked_area = rembg_create_fg(
frame_dir=frame_dir,
output_dir=fg_masked_dir,
output_mask_dir=fg_mask_dir,
masked_area_list=masked_area,
mask_padding=mask_padding,
bg_color=None if no_gb else (0,255,0),
)
else:
masked_area = create_fg(
mask_token=mask_token,
frame_dir=frame_dir,
output_dir=fg_masked_dir,
output_mask_dir=fg_mask_dir,
masked_area_list=masked_area,
box_threshold=box_threshold,
text_threshold=text_threshold,
mask_padding=mask_padding,
sam_checkpoint= "data/models/SAM/sam_hq_vit_h.pth" if not low_vram else "data/models/SAM/sam_hq_vit_b.pth",
bg_color=None if no_gb else (0,255,0),
)
if not no_crop:
frame_size_hw = (masked_area[0].shape[1],masked_area[0].shape[2])
cropped_mask_list, mask_pos_list, crop_size_hw = crop_mask_list(masked_area)
logger.info(f"crop fg_masked_dir")
crop_frames(mask_pos_list, crop_size_hw, fg_masked_dir)
logger.info(f"crop fg_mask_dir")
crop_frames(mask_pos_list, crop_size_hw, fg_mask_dir)
save_crop_info(mask_pos_list, crop_size_hw, frame_size_hw, fg_dir / "crop_info.json")
else:
crop_size_hw = None
logger.info(f"mask from [{mask_token}] are output to {fg_dir}")
shutil.copytree(fg_masked_dir, fg_dir / "00_controlnet_image/controlnet_openpose", dirs_exist_ok=True)
#shutil.copytree(fg_masked_dir, fg_dir / "00_controlnet_image/controlnet_ip2p", dirs_exist_ok=True)
if crop_size_hw:
if crop_size_hw[0] == 0 or crop_size_hw[1] == 0:
crop_size_hw = None
output_list.append((fg_dir, crop_size_hw))
torch.cuda.empty_cache()
bg_dir = stylize_dir.joinpath(f"bg_{time_str}")
bg_dir.mkdir(parents=True, exist_ok=True)
create_controlnet_dir( bg_dir / "00_controlnet_image" )
bg_inpaint_dir = bg_dir / "00_img2img"
bg_inpaint_dir.mkdir(parents=True, exist_ok=True)
create_bg(frame_dir, bg_inpaint_dir, masked_area,
use_half = True,
raft_iter = 20,
subvideo_length=80 if not low_vram else 50,
neighbor_length=10 if not low_vram else 8,
ref_stride=10 if not low_vram else 8,
low_vram = low_vram,
)
logger.info(f"background are output to {bg_dir}")
shutil.copytree(bg_inpaint_dir, bg_dir / "00_controlnet_image/controlnet_tile", dirs_exist_ok=True)
shutil.copytree(bg_inpaint_dir, bg_dir / "00_controlnet_image/controlnet_ip2p", dirs_exist_ok=True)
output_list.append((bg_dir,None))
torch.cuda.empty_cache()
black_list = []
if ignore_list.is_file():
with open(ignore_list) as f:
black_list = [s.strip() for s in f.readlines()]
for output, size in output_list:
model_config.prompt_map = get_labels(
frame_dir= output / "00_img2img",
interval=predicte_interval,
general_threshold=general_threshold,
character_threshold=character_threshold,
ignore_tokens=black_list,
with_confidence=with_confidence,
is_danbooru_format=is_danbooru_format,
is_cpu = False,
)
model_config.controlnet_map["input_image_dir"] = os.path.relpath((output / "00_controlnet_image" ).absolute(), data_dir)
model_config.img2img_map["init_img_dir"] = os.path.relpath((output / "00_img2img" ).absolute(), data_dir)
if size is not None:
h, w = size
height = 1024 * (h/(h+w))
width = 1024 * (w/(h+w))
height = int(height//8 * 8)
width = int(width//8 * 8)
model_config.stylize_config["0"]["width"]=width
model_config.stylize_config["0"]["height"]=height
if "1" in model_config.stylize_config:
model_config.stylize_config["1"]["width"]=int(width * 1.25 //8*8)
model_config.stylize_config["1"]["height"]=int(height * 1.25 //8*8)
else:
height, width = org_frame_size
model_config.stylize_config["0"]["width"]=width
model_config.stylize_config["0"]["height"]=height
if "1" in model_config.stylize_config:
model_config.stylize_config["1"]["width"]=int(width * 1.25 //8*8)
model_config.stylize_config["1"]["height"]=int(height * 1.25 //8*8)
save_config_path = output.joinpath("prompt.json")
save_config_path.write_text(model_config.json(indent=4), encoding="utf-8")
@stylize.command(no_args_is_help=True)
def composite(
stylize_dir: Annotated[
Path,
typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"),
] = ...,
box_threshold: Annotated[
float,
typer.Option(
"--box_threshold",
"-b",
min=0.0,
max=1.0,
help="box_threshold",
rich_help_panel="create mask",
),
] = 0.3,
text_threshold: Annotated[
float,
typer.Option(
"--text_threshold",
"-t",
min=0.0,
max=1.0,
help="text_threshold",
rich_help_panel="create mask",
),
] = 0.25,
mask_padding: Annotated[
int,
typer.Option(
"--mask_padding",
"-mp",
min=-100,
max=100,
help="padding pixel value",
rich_help_panel="create mask",
),
] = 0,
use_rembg: Annotated[
bool,
typer.Option(
"--use_rembg",
"-rem",
is_flag=True,
help="use \[rembg] instead of \[Sam+GroundingDINO]",
rich_help_panel="create mask",
),
] = False,
use_animeseg: Annotated[
bool,
typer.Option(
"--use_animeseg",
"-anim",
is_flag=True,
help="use \[anime-segmentation] instead of \[Sam+GroundingDINO]",
rich_help_panel="create mask",
),
] = False,
low_vram: Annotated[
bool,
typer.Option(
"--low_vram",
"-lo",
is_flag=True,
help="low vram mode",
rich_help_panel="create mask/tag",
),
] = False,
is_simple_composite: Annotated[
bool,
typer.Option(
"--simple_composite",
"-si",
is_flag=True,
help="simple composite",
rich_help_panel="composite",
),
] = False,
):
"""composite FG and BG"""
from animatediff.utils.composite import composite, simple_composite
from animatediff.utils.mask import (create_fg, load_frame_list,
load_mask_list, restore_position)
from animatediff.utils.mask_animseg import animseg_create_fg
from animatediff.utils.mask_rembg import rembg_create_fg
if use_animeseg and use_rembg:
raise ValueError("use_animeseg and use_rembg cannot be enabled at the same time")
prepare_sam_hq(low_vram)
if use_animeseg:
prepare_anime_seg()
time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
config_org = stylize_dir.joinpath("prompt.json")
model_config: ModelConfig = get_model_config(config_org)
composite_config = {}
if "composite" in model_config.stylize_config:
composite_config = model_config.stylize_config["composite"]
else:
raise ValueError('model_config.stylize_config["composite"] not found')
save_dir = stylize_dir.joinpath(f"cp_{time_str}")
save_dir.mkdir(parents=True, exist_ok=True)
save_config_path = save_dir.joinpath("prompt.json")
save_config_path.write_text(model_config.json(indent=4), encoding="utf-8")
bg_dir = composite_config["bg_frame_dir"]
bg_dir = Path(bg_dir)
if not bg_dir.is_dir():
raise ValueError('model_config.stylize_config["composite"]["bg_frame_dir"] not valid')
frame_len = len(sorted(glob.glob( os.path.join(bg_dir, "[0-9]*.png"), recursive=False)))
fg_list = composite_config["fg_list"]
for i, fg_param in enumerate(fg_list):
mask_token = fg_param["mask_prompt"]
frame_dir = Path(fg_param["path"])
if not frame_dir.is_dir():
logger.warn(f"{frame_dir=} not valid -> skip")
continue
mask_dir = Path(fg_param["mask_path"])
if not mask_dir.is_dir():
logger.info(f"{mask_dir=} not valid -> create mask")
fg_tmp_dir = save_dir.joinpath(f"fg_{i:02d}_{time_str}")
fg_tmp_dir.mkdir(parents=True, exist_ok=True)
masked_area_list = [None for f in range(frame_len)]
if use_animeseg:
mask_list = animseg_create_fg(
frame_dir=frame_dir,
output_dir=fg_tmp_dir,
output_mask_dir=None,
masked_area_list=masked_area_list,
mask_padding=mask_padding,
)
elif use_rembg:
mask_list = rembg_create_fg(
frame_dir=frame_dir,
output_dir=fg_tmp_dir,
output_mask_dir=None,
masked_area_list=masked_area_list,
mask_padding=mask_padding,
)
else:
mask_list = create_fg(
mask_token=mask_token,
frame_dir=frame_dir,
output_dir=fg_tmp_dir,
output_mask_dir=None,
masked_area_list=masked_area_list,
box_threshold=box_threshold,
text_threshold=text_threshold,
mask_padding=mask_padding,
sam_checkpoint= "data/models/SAM/sam_hq_vit_h.pth" if not low_vram else "data/models/SAM/sam_hq_vit_b.pth",
)
else:
logger.info(f"use {mask_dir=} as mask")
masked_area_list = [None for f in range(frame_len)]
mask_list = load_mask_list(mask_dir, masked_area_list, mask_padding)
mask_list = [ m.transpose([1,2,0]) if m is not None else m for m in mask_list]
crop_info_path = frame_dir.parent.parent / "crop_info.json"
crop_info={}
if crop_info_path.is_file():
with open(crop_info_path, mode="rt", encoding="utf-8") as f:
crop_info = json.load(f)
mask_list = restore_position(mask_list, crop_info)
fg_list = [None for f in range(frame_len)]
fg_list = load_frame_list(frame_dir, fg_list, crop_info)
output_dir = save_dir.joinpath(f"bg_{i:02d}_{time_str}")
output_dir.mkdir(parents=True, exist_ok=True)
if is_simple_composite:
simple_composite(bg_dir, fg_list, output_dir, mask_list)
else:
composite(bg_dir, fg_list, output_dir, mask_list)
bg_dir = output_dir
from animatediff.generate import save_output
frames = sorted(glob.glob( os.path.join(bg_dir, "[0-9]*.png"), recursive=False))
out_images = []
for f in frames:
out_images.append(Image.open(f))
out_file = save_dir.joinpath(f"composite")
save_output(out_images,bg_dir,out_file,model_config.output,True,save_frames=None,save_video=None)
logger.info(f"output to {out_file}")
@stylize.command(no_args_is_help=True)
def create_region(
stylize_dir: Annotated[
Path,
typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"),
] = ...,
frame_dir: Annotated[
Path,
typer.Option(
"--frame_dir",
"-f",
path_type=Path,
file_okay=False,
help="Path to source frames directory. default is 'STYLIZE_DIR/00_img2img'",
),
] = None,
box_threshold: Annotated[
float,
typer.Option(
"--box_threshold",
"-b",
min=0.0,
max=1.0,
help="box_threshold",
rich_help_panel="create mask",
),
] = 0.3,
text_threshold: Annotated[
float,
typer.Option(
"--text_threshold",
"-t",
min=0.0,
max=1.0,
help="text_threshold",
rich_help_panel="create mask",
),
] = 0.25,
mask_padding: Annotated[
int,
typer.Option(
"--mask_padding",
"-mp",
min=-100,
max=100,
help="padding pixel value",
rich_help_panel="create mask",
),
] = 0,
use_rembg: Annotated[
bool,
typer.Option(
"--use_rembg",
"-rem",
is_flag=True,
help="use [rembg] instead of [Sam+GroundingDINO]",
rich_help_panel="create mask",
),
] = False,
use_animeseg: Annotated[
bool,
typer.Option(
"--use_animeseg",
"-anim",
is_flag=True,
help="use [anime-segmentation] instead of [Sam+GroundingDINO]",
rich_help_panel="create mask",
),
] = False,
low_vram: Annotated[
bool,
typer.Option(
"--low_vram",
"-lo",
is_flag=True,
help="low vram mode",
rich_help_panel="create mask/tag",
),
] = False,
ignore_list: Annotated[
Path,
typer.Option(
"--ignore-list",
"-g",
path_type=Path,
dir_okay=False,
exists=True,
help="path to ignore token list file",
rich_help_panel="create tag",
),
] = Path("config/prompts/ignore_tokens.txt"),
predicte_interval: Annotated[
int,
typer.Option(
"--predicte-interval",
"-p",
min=1,
max=120,
help="Interval of frames to be predicted",
rich_help_panel="create tag",
),
] = 1,
general_threshold: Annotated[
float,
typer.Option(
"--threshold",
"-th",
min=0.0,
max=1.0,
help="threshold for general token confidence",
rich_help_panel="create tag",
),
] = 0.35,
character_threshold: Annotated[
float,
typer.Option(
"--threshold2",
"-th2",
min=0.0,
max=1.0,
help="threshold for character token confidence",
rich_help_panel="create tag",
),
] = 0.85,
without_confidence: Annotated[
bool,
typer.Option(
"--no-confidence-format",
"-ncf",
is_flag=True,
help="confidence token format or not. ex. '(close-up:0.57), (monochrome:1.1)' -> 'close-up, monochrome'",
rich_help_panel="create tag",
),
] = False,
is_no_danbooru_format: Annotated[
bool,
typer.Option(
"--no-danbooru-format",
"-ndf",
is_flag=True,
help="danbooru token format or not. ex. 'bandaid_on_leg, short_hair' -> 'bandaid on leg, short hair'",
rich_help_panel="create tag",
),
] = False,
):
"""Create region from prompt"""
from animatediff.utils.mask import create_bg, create_fg
from animatediff.utils.mask_animseg import animseg_create_fg
from animatediff.utils.mask_rembg import rembg_create_fg
is_danbooru_format = not is_no_danbooru_format
with_confidence = not without_confidence
if use_animeseg and use_rembg:
raise ValueError("use_animeseg and use_rembg cannot be enabled at the same time")
prepare_sam_hq(low_vram)
prepare_groundingDINO()
prepare_propainter()
if use_animeseg:
prepare_anime_seg()
time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
config_org = stylize_dir.joinpath("prompt.json")
model_config: ModelConfig = get_model_config(config_org)
if frame_dir is None:
frame_dir = stylize_dir / "00_img2img"
if not frame_dir.is_dir():
raise ValueError(f'{frame_dir=} does not exist.')
create_mask_list = []
if "create_mask" in model_config.stylize_config:
create_mask_list = model_config.stylize_config["create_mask"]
else:
raise ValueError('model_config.stylize_config["create_mask"] not found')
output_list = []
stylize_frame = sorted(glob.glob( os.path.join(frame_dir, "[0-9]*.png"), recursive=False))
frame_len = len(stylize_frame)
masked_area = [None for f in range(frame_len)]
if use_rembg:
create_mask_list = ["rembg"]
elif use_animeseg:
create_mask_list = ["anime-segmentation"]
for i,mask_token in enumerate(create_mask_list):
fg_dir = stylize_dir.joinpath(f"r_fg_{i:02d}_{time_str}")
fg_dir.mkdir(parents=True, exist_ok=True)
fg_masked_dir = fg_dir / "00_tmp_masked"
fg_masked_dir.mkdir(parents=True, exist_ok=True)
fg_mask_dir = fg_dir / "00_mask"
fg_mask_dir.mkdir(parents=True, exist_ok=True)
if use_animeseg:
masked_area = animseg_create_fg(
frame_dir=frame_dir,
output_dir=fg_masked_dir,
output_mask_dir=fg_mask_dir,
masked_area_list=masked_area,
mask_padding=mask_padding,
bg_color=(0,255,0),
)
elif use_rembg:
masked_area = rembg_create_fg(
frame_dir=frame_dir,
output_dir=fg_masked_dir,
output_mask_dir=fg_mask_dir,
masked_area_list=masked_area,
mask_padding=mask_padding,
bg_color=(0,255,0),
)
else:
masked_area = create_fg(
mask_token=mask_token,
frame_dir=frame_dir,
output_dir=fg_masked_dir,
output_mask_dir=fg_mask_dir,
masked_area_list=masked_area,
box_threshold=box_threshold,
text_threshold=text_threshold,
mask_padding=mask_padding,
sam_checkpoint= "data/models/SAM/sam_hq_vit_h.pth" if not low_vram else "data/models/SAM/sam_hq_vit_b.pth",
bg_color=(0,255,0),
)
logger.info(f"mask from [{mask_token}] are output to {fg_dir}")
output_list.append((fg_dir, fg_masked_dir, fg_mask_dir))
torch.cuda.empty_cache()
bg_dir = stylize_dir.joinpath(f"r_bg_{time_str}")
bg_dir.mkdir(parents=True, exist_ok=True)
bg_inpaint_dir = bg_dir / "00_tmp_inpainted"
bg_inpaint_dir.mkdir(parents=True, exist_ok=True)
create_bg(frame_dir, bg_inpaint_dir, masked_area,
use_half = True,
raft_iter = 20,
subvideo_length=80 if not low_vram else 50,
neighbor_length=10 if not low_vram else 8,
ref_stride=10 if not low_vram else 8,
low_vram = low_vram,
)
logger.info(f"background are output to {bg_dir}")
output_list.append((bg_dir,bg_inpaint_dir,None))
torch.cuda.empty_cache()
black_list = []
if ignore_list.is_file():
with open(ignore_list) as f:
black_list = [s.strip() for s in f.readlines()]
black_list.append("simple_background")
black_list.append("green_background")
region_map = {}
for i, (output_root, masked_dir, mask_dir) in enumerate(output_list):
prompt_map = get_labels(
frame_dir= masked_dir,
interval=predicte_interval,
general_threshold=general_threshold,
character_threshold=character_threshold,
ignore_tokens=black_list,
with_confidence=with_confidence,
is_danbooru_format=is_danbooru_format,
is_cpu = False,
)
if mask_dir:
ipadapter_dir = output_root / "00_ipadapter"
ipadapter_dir.mkdir(parents=True, exist_ok=True)
region_map[str(i)]={
"enable": True,
"crop_generation_rate": 0.0,
"mask_dir" : os.path.relpath(mask_dir.absolute(), data_dir),
"save_mask": True,
"is_init_img" : False,
"condition" : {
"prompt_fixed_ratio": 0.5,
"head_prompt": "",
"prompt_map": prompt_map,
"tail_prompt": "",
"ip_adapter_map": {
"enable": True,
"input_image_dir": os.path.relpath(ipadapter_dir.absolute(), data_dir),
"prompt_fixed_ratio": 0.5,
"save_input_image": True,
"resized_to_square": False
}
}
}
else:
region_map["background"]={
"is_init_img" : False,
"hint" : "background's condition refers to the one in root"
}
model_config.prompt_map = prompt_map
model_config.region_map =region_map
config_org.write_text(model_config.json(indent=4), encoding="utf-8")