import subprocess subprocess.run( 'pip install numpy==1.26.4', shell=True ) import os import gradio as gr import torch import spaces import random from PIL import Image import numpy as np from glob import glob from pathlib import Path from typing import Optional #Core functions from https://github.com/modelscope/DiffSynth-Studio from diffsynth import save_video, ModelManager, SVDVideoPipeline from diffsynth import SDVideoPipeline, ControlNetConfigUnit, VideoData, save_frames from diffsynth.extensions.RIFE import RIFESmoother import requests def download_model(url, file_path): model_file = requests.get(url, allow_redirects=True) with open(file_path, "wb") as f: f.write(model_file.content) download_model("https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16", "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors") download_model("https://huggingface.co./guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt", "models/AnimateDiff/mm_sd_v15_v2.ckpt") download_model("https://huggingface.co./lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11p_sd15_lineart.pth") download_model("https://huggingface.co./lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth") download_model("https://huggingface.co./lllyasviel/Annotators/resolve/main/sk_model.pth", "models/Annotators/sk_model.pth") download_model("https://huggingface.co./lllyasviel/Annotators/resolve/main/sk_model2.pth", "models/Annotators/sk_model2.pth") download_model("https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16", "models/textual_inversion/verybadimagenegative_v1.3.pt") HF_TOKEN = os.environ.get("HF_TOKEN", None) # Constants MAX_SEED = np.iinfo(np.int32).max CSS = """ footer { visibility: hidden; } """ JS = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" # Ensure model and scheduler are initialized in GPU-enabled function if torch.cuda.is_available(): model_manager = ModelManager( torch_dtype=torch.float16, device="cuda", model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"], downloading_priority=["HuggingFace"]) pipe = SVDVideoPipeline.from_model_manager(model_manager) model_manager2 = ModelManager(torch_dtype=torch.float16, device="cuda") model_manager2.load_textual_inversions("models/textual_inversion") model_manager2.load_models([ "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors", "models/AnimateDiff/mm_sd_v15_v2.ckpt", "models/ControlNet/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth", "models/RIFE/flownet.pkl" ]) pipe2 = SDVideoPipeline.from_model_manager( model_manager2, [ ControlNetConfigUnit( processor_id="lineart", model_path="models/ControlNet/control_v11p_sd15_lineart.pth", scale=0.5 ), ControlNetConfigUnit( processor_id="tile", model_path="models/ControlNet/control_v11f1e_sd15_tile.pth", scale=0.5 ) ] ) smoother = RIFESmoother.from_model_manager(model_manager2) def change_media(image_in, video_in, selected): if selected == "ExVideo": return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) elif selected == "Diffutoon": return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True) def update_frames(video_in): up_video = VideoData( video_file=video_in) frame_len = len(up_video) return gr.update(maximum=frame_len) @spaces.GPU(duration=120) def generate( video_in, image_in, selected, prompt: str = "best quality", seed: int = -1, num_inference_steps: int = 10, num_frames: int = 30, height: int = 512, width: int = 512, animatediff_batch_size: int = 32, animatediff_stride: int = 16, motion_bucket_id: int = 127, fps_id: int = 25, output_folder: str = "outputs", progress=gr.Progress(track_tqdm=True)): video = "" if seed == -1: seed = random.randint(0, MAX_SEED) torch.manual_seed(seed) os.makedirs(output_folder, exist_ok=True) base_count = len(glob(os.path.join(output_folder, "*.mp4"))) video_path = os.path.join(output_folder, f"{base_count:06d}.mp4") if selected == "ExVideo" and image_in: image = Image.open(image_in) video = pipe( input_image=image.resize((width, height)), num_frames=num_frames, fps=fps_id, height=height, width=width, motion_bucket_id=motion_bucket_id, num_inference_steps=num_inference_steps, min_cfg_scale=2, max_cfg_scale=2, contrast_enhance_scale=1.2 ) model_manager.to("cpu") elif selected == "Diffutoon" and video_in: up_video = VideoData( video_file=video_in, height=height, width=width) input_video = [up_video[i] for i in range(1, num_frames)] video = pipe2( prompt=prompt, negative_prompt="verybadimagenegative_v1.3", cfg_scale=3, clip_skip=2, controlnet_frames=input_video, num_frames=len(input_video), num_inference_steps=num_inference_steps, height=height, width=width, animatediff_batch_size=animatediff_batch_size, animatediff_stride=animatediff_stride, vram_limit_level=0, ) video = smoother(video) save_video(video, video_path, fps=fps_id) return video_path, seed examples = [ ['./walking.mp4', None, "Diffutoon", "A woman walking on the street"], ['./smilegirl.mp4', None, "Diffutoon", "A girl stand on the grass"], ['./working.mp4', None, "Diffutoon", "A woman is doing the dishes"], [None, "./train.jpg", "ExVideo", ""], [None, "./girl.webp", "ExVideo", ""], [None, "./robo.jpg", "ExVideo", ""], ] # Gradio Interface with gr.Blocks(css=CSS, js=JS, theme="soft") as demo: gr.HTML("