from diffsynth import ModelManager, SDVideoPipeline, ControlNetConfigUnit, VideoData, save_video import torch, spaces import gradio as gr # Load models model_manager = ModelManager(torch_dtype=torch.float16, device="cuda") model_manager.load_textual_inversions("models/textual_inversion") model_manager.load_models([ "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors", "models/AnimateDiff/mm_sd_v15_v2.ckpt", "models/ControlNet/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth", ]) pipe = SDVideoPipeline.from_model_manager( model_manager, [ ControlNetConfigUnit( processor_id="lineart", model_path="models/ControlNet/control_v11p_sd15_lineart.pth", scale=0.5 ), ControlNetConfigUnit( processor_id="tile", model_path="models/ControlNet/control_v11f1e_sd15_tile.pth", scale=0.5 ) ] ) @spaces.GPU(duration=500) def generate_video(inp_vid): video = VideoData( video_file=inp_vid, height=1024, width=1024) input_video = [video[i] for i in range(0, 60)] # Toon shading (20G VRAM) torch.manual_seed(0) output_video = pipe( prompt="best quality, perfect anime illustration, light, a girl is dancing, smile, solo", negative_prompt="verybadimagenegative_v1.3", cfg_scale=3, clip_skip=2, controlnet_frames=input_video, num_frames=len(input_video), num_inference_steps=10, height=1024, width=1024, animatediff_batch_size=32, animatediff_stride=16, vram_limit_level=0, ) # Save video save_video(output_video, "output_video.mp4", fps=60) return "output_video.mp4" app = gr.Blocks(theme="JohnSmith9982/small_and_pretty") with app: gr.Markdown("#