Spaces:

EXCAI
/

Diffusion-As-Shader

Running on Zero

App Files Files Community

Beijia11 commited on 3 days ago

Commit

c4fce07

1 Parent(s): 6ded12b

merge demo.py and app.py

Browse files

Files changed (1) hide show

app.py +272 -159

app.py CHANGED Viewed

@@ -2,20 +2,30 @@ import os
 import sys
 import gradio as gr
 import torch
-import subprocess
 import argparse
-import glob
-import spaces
 project_root = os.path.dirname(os.path.abspath(__file__))
 os.environ["GRADIO_TEMP_DIR"] = os.path.join(project_root, "tmp", "gradio")
 sys.path.append(project_root)
 HERE_PATH = os.path.normpath(os.path.dirname(__file__))
 sys.path.insert(0, HERE_PATH)
 from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="EXCAI/Diffusion-As-Shader", filename='spatracker/spaT_final.pth', local_dir=f'{HERE_PATH}/checkpoints/')
 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Diffusion as Shader Web UI")
@@ -31,21 +41,53 @@ GPU_ID = args.gpu
 DEFAULT_MODEL_PATH = args.model_path
 OUTPUT_DIR = args.output_dir
-# if 'CUDA_HOME' not in os.environ:
-#     for cuda_path in ['/usr/local/cuda', '/usr/cuda', '/opt/cuda']:
-#         if os.path.exists(cuda_path):
-#             os.environ['CUDA_HOME'] = cuda_path
-#             print(cuda_path)
-#             break
-#     if 'CUDA_HOME' not in os.environ:
-#         os.environ['CUDA_HOME'] = '/usr/local/cuda'
-#         print("set default cuda path in: /usr/local/cuda")
 # Create necessary directories
 os.makedirs("outputs", exist_ok=True)
 # Create project tmp directory instead of using system temp
 os.makedirs(os.path.join(project_root, "tmp"), exist_ok=True)
 os.makedirs(os.path.join(project_root, "tmp", "gradio"), exist_ok=True)
 def save_uploaded_file(file):
     if file is None:
@@ -86,59 +128,22 @@ def save_uploaded_file(file):
     return temp_path
-def create_run_command(args):
-    """Create command based on input parameters"""
-    cmd = ["python", "demo.py"]
-    if "prompt" not in args or args["prompt"] is None or args["prompt"] == "":
-        args["prompt"] = ""
-    if "checkpoint_path" not in args or args["checkpoint_path"] is None or args["checkpoint_path"] == "":
-        args["checkpoint_path"] = DEFAULT_MODEL_PATH
-    # 添加调试输出
-    print(f"DEBUG: Command args: {args}")
-    for key, value in args.items():
-        if value is not None:
-            # Handle boolean values correctly - for repaint, we need to pass true/false
-            if isinstance(value, bool):
-                cmd.append(f"--{key}")
-                cmd.append(str(value).lower())  # Convert True/False to true/false
-            else:
-                cmd.append(f"--{key}")
-                cmd.append(str(value))
-    return cmd
-@spaces.GPU(duration=240)
-def run_process(cmd):
-    """Run command and return output"""
-    print(f"Running command: {' '.join(cmd)}")
-    process = subprocess.Popen(
-        cmd,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        universal_newlines=True
-    )
-    output = []
-    for line in iter(process.stdout.readline, ""):
-        print(line, end="")
-        output.append(line)
-        if not line:
-            break
-    process.stdout.close()
-    return_code = process.wait()
-    if return_code:
-        stderr = process.stderr.read()
-        print(f"Error: {stderr}")
-        raise subprocess.CalledProcessError(return_code, cmd, output="\n".join(output), stderr=stderr)
-    return "\n".join(output)
-@spaces.GPU(duration=240)
 def process_motion_transfer(source, prompt, mt_repaint_option, mt_repaint_image):
     """Process video motion transfer task"""
     try:
@@ -150,42 +155,68 @@ def process_motion_transfer(source, prompt, mt_repaint_option, mt_repaint_image)
         print(f"DEBUG: Repaint option: {mt_repaint_option}")
         print(f"DEBUG: Repaint image: {mt_repaint_image}")
-        args = {
-            "input_path": input_video_path,
-            "prompt": f"\"{prompt}\"",
-            "checkpoint_path": DEFAULT_MODEL_PATH,
-            "output_dir": OUTPUT_DIR,
-            "gpu": GPU_ID
-        }
-        # Priority: Custom Image > Yes > No
         if mt_repaint_image is not None:
-            # Custom image takes precedence if provided
             repaint_path = save_uploaded_file(mt_repaint_image)
-            print(f"DEBUG: Repaint path: {repaint_path}")
-            args["repaint"] = repaint_path
         elif mt_repaint_option == "Yes":
-            # Otherwise use Yes/No selection
-            args["repaint"] = "true"
-        # Create and run command
-        cmd = create_run_command(args)
-        output = run_process(cmd)
-        # Find generated video files
-        output_files = glob.glob(os.path.join(OUTPUT_DIR, "*.mp4"))
-        if output_files:
-            # Sort by modification time, return the latest file
-            latest_file = max(output_files, key=os.path.getmtime)
-            return latest_file
         else:
-            return None
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")
         return None
-@spaces.GPU(duration=240)
 def process_camera_control(source, prompt, camera_motion, tracking_method):
     """Process camera control task"""
     try:
@@ -197,36 +228,66 @@ def process_camera_control(source, prompt, camera_motion, tracking_method):
         print(f"DEBUG: Camera motion: '{camera_motion}'")
         print(f"DEBUG: Tracking method: '{tracking_method}'")
-        args = {
-            "input_path": input_media_path,
-            "prompt": prompt,
-            "checkpoint_path": DEFAULT_MODEL_PATH,
-            "output_dir": OUTPUT_DIR,
-            "gpu": GPU_ID,
-            "tracking_method": tracking_method
-        }
-        if camera_motion and camera_motion.strip():
-            args["camera_motion"] = camera_motion
-        # Create and run command
-        cmd = create_run_command(args)
-        output = run_process(cmd)
-        # Find generated video files
-        output_files = glob.glob(os.path.join(OUTPUT_DIR, "*.mp4"))
-        if output_files:
-            # Sort by modification time, return the latest file
-            latest_file = max(output_files, key=os.path.getmtime)
-            return latest_file
-        else:
-            return None
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")
         return None
-@spaces.GPU(duration=240)
 def process_object_manipulation(source, prompt, object_motion, object_mask, tracking_method):
     """Process object manipulation task"""
     try:
@@ -236,36 +297,90 @@ def process_object_manipulation(source, prompt, object_motion, object_mask, trac
             return None
         object_mask_path = save_uploaded_file(object_mask)
-        args = {
-            "input_path": input_image_path,
-            "prompt": prompt,
-            "checkpoint_path": DEFAULT_MODEL_PATH,
-            "output_dir": OUTPUT_DIR,
-            "gpu": GPU_ID,
-            "object_motion": object_motion,
-            "object_mask": object_mask_path,
-            "tracking_method": tracking_method
-        }
-        # Create and run command
-        cmd = create_run_command(args)
-        output = run_process(cmd)
-        # Find generated video files
-        output_files = glob.glob(os.path.join(OUTPUT_DIR, "*.mp4"))
-        if output_files:
-            # Sort by modification time, return the latest file
-            latest_file = max(output_files, key=os.path.getmtime)
-            return latest_file
         else:
-            return None
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")
         return None
-@spaces.GPU(duration=240)
 def process_mesh_animation(source, prompt, tracking_video, ma_repaint_option, ma_repaint_image):
     """Process mesh animation task"""
     try:
@@ -278,36 +393,34 @@ def process_mesh_animation(source, prompt, tracking_video, ma_repaint_option, ma
         if tracking_video_path is None:
             return None
-        args = {
-            "input_path": input_video_path,
-            "prompt": prompt,
-            "checkpoint_path": DEFAULT_MODEL_PATH,
-            "output_dir": OUTPUT_DIR,
-            "gpu": GPU_ID,
-            "tracking_path": tracking_video_path
-        }
-        # Priority: Custom Image > Yes > No
         if ma_repaint_image is not None:
-            # Custom image takes precedence if provided
             repaint_path = save_uploaded_file(ma_repaint_image)
-            args["repaint"] = repaint_path
         elif ma_repaint_option == "Yes":
-            # Otherwise use Yes/No selection
-            args["repaint"] = "true"
-        # Create and run command
-        cmd = create_run_command(args)
-        output = run_process(cmd)
-        # Find generated video files
-        output_files = glob.glob(os.path.join(OUTPUT_DIR, "*.mp4"))
-        if output_files:
-            # Sort by modification time, return the latest file
-            latest_file = max(output_files, key=os.path.getmtime)
-            return latest_file
-        else:
-            return None
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")

 import sys
 import gradio as gr
 import torch
 import argparse
+from PIL import Image
+import numpy as np
+import torchvision.transforms as transforms
+from moviepy.editor import VideoFileClip
+from diffusers.utils import load_image, load_video
 project_root = os.path.dirname(os.path.abspath(__file__))
 os.environ["GRADIO_TEMP_DIR"] = os.path.join(project_root, "tmp", "gradio")
 sys.path.append(project_root)
+try:
+    sys.path.append(os.path.join(project_root, "submodules/MoGe"))
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
+except:
+    print("Warning: MoGe not found, motion transfer will not be applied")
 HERE_PATH = os.path.normpath(os.path.dirname(__file__))
 sys.path.insert(0, HERE_PATH)
 from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="EXCAI/Diffusion-As-Shader", filename='spatracker/spaT_final.pth', local_dir=f'{HERE_PATH}/checkpoints/')
+from models.pipelines import DiffusionAsShaderPipeline, FirstFrameRepainter, CameraMotionGenerator, ObjectMotionGenerator
+from submodules.MoGe.moge.model import MoGeModel
 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Diffusion as Shader Web UI")
 DEFAULT_MODEL_PATH = args.model_path
 OUTPUT_DIR = args.output_dir
 # Create necessary directories
 os.makedirs("outputs", exist_ok=True)
 # Create project tmp directory instead of using system temp
 os.makedirs(os.path.join(project_root, "tmp"), exist_ok=True)
 os.makedirs(os.path.join(project_root, "tmp", "gradio"), exist_ok=True)
+def load_media(media_path, max_frames=49, transform=None):
+    """Load video or image frames and convert to tensor
+    Args:
+        media_path (str): Path to video or image file
+        max_frames (int): Maximum number of frames to load
+        transform (callable): Transform to apply to frames
+    Returns:
+        Tuple[torch.Tensor, float, bool]: Video tensor [T,C,H,W], FPS, and is_video flag
+    """
+    if transform is None:
+        transform = transforms.Compose([
+            transforms.Resize((480, 720)),
+            transforms.ToTensor()
+        ])
+    # Determine if input is video or image based on extension
+    ext = os.path.splitext(media_path)[1].lower()
+    is_video = ext in ['.mp4', '.avi', '.mov']
+    if is_video:
+        frames = load_video(media_path)
+        fps = len(frames) / VideoFileClip(media_path).duration
+    else:
+        # Handle image as single frame
+        image = load_image(media_path)
+        frames = [image]
+        fps = 8  # Default fps for images
+    # Ensure we have exactly max_frames
+    if len(frames) > max_frames:
+        frames = frames[:max_frames]
+    elif len(frames) < max_frames:
+        last_frame = frames[-1]
+        while len(frames) < max_frames:
+            frames.append(last_frame.copy())
+    # Convert frames to tensor
+    video_tensor = torch.stack([transform(frame) for frame in frames])
+    return video_tensor, fps, is_video
 def save_uploaded_file(file):
     if file is None:
     return temp_path
+das_pipeline = None
+moge_model = None
+def get_das_pipeline():
+    global das_pipeline
+    if das_pipeline is None:
+        das_pipeline = DiffusionAsShaderPipeline(gpu_id=GPU_ID, output_dir=OUTPUT_DIR)
+    return das_pipeline
+def get_moge_model():
+    global moge_model
+    if moge_model is None:
+        das = get_das_pipeline()
+        moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(das.device)
+    return moge_model
 def process_motion_transfer(source, prompt, mt_repaint_option, mt_repaint_image):
     """Process video motion transfer task"""
     try:
         print(f"DEBUG: Repaint option: {mt_repaint_option}")
         print(f"DEBUG: Repaint image: {mt_repaint_image}")
+        das = get_das_pipeline()
+        video_tensor, fps, is_video = load_media(input_video_path)
+        if not is_video:
+            tracking_method = "moge"
+            print("Image input detected, using MoGe for tracking video generation.")
+        else:
+            tracking_method = "spatracker"
+        repaint_img_tensor = None
         if mt_repaint_image is not None:
             repaint_path = save_uploaded_file(mt_repaint_image)
+            repaint_img_tensor, _, _ = load_media(repaint_path)
+            repaint_img_tensor = repaint_img_tensor[0]
         elif mt_repaint_option == "Yes":
+            repainter = FirstFrameRepainter(gpu_id=GPU_ID, output_dir=OUTPUT_DIR)
+            repaint_img_tensor = repainter.repaint(
+                video_tensor[0],
+                prompt=prompt,
+                depth_path=None
+            )
+        tracking_tensor = None
+        if tracking_method == "moge":
+            moge = get_moge_model()
+            infer_result = moge.infer(video_tensor[0].to(das.device))  # [C, H, W] in range [0,1]
+            H, W = infer_result["points"].shape[0:2]
+            pred_tracks = infer_result["points"].unsqueeze(0).repeat(49, 1, 1, 1) #[T, H, W, 3]
+            poses = torch.eye(4).unsqueeze(0).repeat(49, 1, 1)
+            pred_tracks_flatten = pred_tracks.reshape(video_tensor.shape[0], H*W, 3)
+            cam_motion = CameraMotionGenerator(None)
+            cam_motion.set_intr(infer_result["intrinsics"])
+            pred_tracks = cam_motion.w2s(pred_tracks_flatten, poses).reshape([video_tensor.shape[0], H, W, 3]) # [T, H, W, 3]
+            _, tracking_tensor = das.visualize_tracking_moge(
+                pred_tracks.cpu().numpy(),
+                infer_result["mask"].cpu().numpy()
+            )
+            print('Export tracking video via MoGe')
         else:
+            pred_tracks, pred_visibility, T_Firsts = das.generate_tracking_spatracker(video_tensor)
+            _, tracking_tensor = das.visualize_tracking_spatracker(video_tensor, pred_tracks, pred_visibility, T_Firsts)
+            print('Export tracking video via SpaTracker')
+        output_path = das.apply_tracking(
+            video_tensor=video_tensor,
+            fps=8,
+            tracking_tensor=tracking_tensor,
+            img_cond_tensor=repaint_img_tensor,
+            prompt=prompt,
+            checkpoint_path=DEFAULT_MODEL_PATH
+        )
+        return output_path
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")
         return None
 def process_camera_control(source, prompt, camera_motion, tracking_method):
     """Process camera control task"""
     try:
         print(f"DEBUG: Camera motion: '{camera_motion}'")
         print(f"DEBUG: Tracking method: '{tracking_method}'")
+        das = get_das_pipeline()
+        video_tensor, fps, is_video = load_media(input_media_path)
+        if not is_video and tracking_method == "spatracker":
+            tracking_method = "moge"
+            print("Image input detected with spatracker selected, switching to MoGe")
+        cam_motion = CameraMotionGenerator(camera_motion)
+        repaint_img_tensor = None
+        tracking_tensor = None
+        if tracking_method == "moge":
+            moge = get_moge_model()
+            infer_result = moge.infer(video_tensor[0].to(das.device))  # [C, H, W] in range [0,1]
+            H, W = infer_result["points"].shape[0:2]
+            pred_tracks = infer_result["points"].unsqueeze(0).repeat(49, 1, 1, 1) #[T, H, W, 3]
+            cam_motion.set_intr(infer_result["intrinsics"])
+            if camera_motion:
+                poses = cam_motion.get_default_motion() # shape: [49, 4, 4]
+                print("Camera motion applied")
+            else:
+                poses = torch.eye(4).unsqueeze(0).repeat(49, 1, 1)
+            pred_tracks_flatten = pred_tracks.reshape(video_tensor.shape[0], H*W, 3)
+            pred_tracks = cam_motion.w2s(pred_tracks_flatten, poses).reshape([video_tensor.shape[0], H, W, 3]) # [T, H, W, 3]
+            _, tracking_tensor = das.visualize_tracking_moge(
+                pred_tracks.cpu().numpy(),
+                infer_result["mask"].cpu().numpy()
+            )
+            print('Export tracking video via MoGe')
+        else:
+            pred_tracks, pred_visibility, T_Firsts = das.generate_tracking_spatracker(video_tensor)
+            if camera_motion:
+                poses = cam_motion.get_default_motion() # shape: [49, 4, 4]
+                pred_tracks = cam_motion.apply_motion_on_pts(pred_tracks, poses)
+                print("Camera motion applied")
+            _, tracking_tensor = das.visualize_tracking_spatracker(video_tensor, pred_tracks, pred_visibility, T_Firsts)
+            print('Export tracking video via SpaTracker')
+        output_path = das.apply_tracking(
+            video_tensor=video_tensor,
+            fps=8,
+            tracking_tensor=tracking_tensor,
+            img_cond_tensor=repaint_img_tensor,
+            prompt=prompt,
+            checkpoint_path=DEFAULT_MODEL_PATH
+        )
+        return output_path
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")
         return None
 def process_object_manipulation(source, prompt, object_motion, object_mask, tracking_method):
     """Process object manipulation task"""
     try:
             return None
         object_mask_path = save_uploaded_file(object_mask)
+        if object_mask_path is None:
+            print("Object mask not provided")
+            return None
+        das = get_das_pipeline()
+        video_tensor, fps, is_video = load_media(input_image_path)
+        if not is_video and tracking_method == "spatracker":
+            tracking_method = "moge"
+            print("Image input detected with spatracker selected, switching to MoGe")
+        mask_image = Image.open(object_mask_path).convert('L')
+        mask_image = transforms.Resize((480, 720))(mask_image)
+        mask = torch.from_numpy(np.array(mask_image) > 127)
+        motion_generator = ObjectMotionGenerator(device=das.device)
+        repaint_img_tensor = None
+        tracking_tensor = None
+        if tracking_method == "moge":
+            moge = get_moge_model()
+            infer_result = moge.infer(video_tensor[0].to(das.device))  # [C, H, W] in range [0,1]
+            H, W = infer_result["points"].shape[0:2]
+            pred_tracks = infer_result["points"].unsqueeze(0).repeat(49, 1, 1, 1) #[T, H, W, 3]
+            pred_tracks = motion_generator.apply_motion(
+                pred_tracks=pred_tracks,
+                mask=mask,
+                motion_type=object_motion,
+                distance=50,
+                num_frames=49,
+                tracking_method="moge"
+            )
+            print(f"Object motion '{object_motion}' applied using provided mask")
+            poses = torch.eye(4).unsqueeze(0).repeat(49, 1, 1)
+            pred_tracks_flatten = pred_tracks.reshape(video_tensor.shape[0], H*W, 3)
+            cam_motion = CameraMotionGenerator(None)
+            cam_motion.set_intr(infer_result["intrinsics"])
+            pred_tracks = cam_motion.w2s(pred_tracks_flatten, poses).reshape([video_tensor.shape[0], H, W, 3]) # [T, H, W, 3]
+            _, tracking_tensor = das.visualize_tracking_moge(
+                pred_tracks.cpu().numpy(),
+                infer_result["mask"].cpu().numpy()
+            )
+            print('Export tracking video via MoGe')
         else:
+            pred_tracks, pred_visibility, T_Firsts = das.generate_tracking_spatracker(video_tensor)
+            pred_tracks = motion_generator.apply_motion(
+                pred_tracks=pred_tracks.squeeze(),
+                mask=mask,
+                motion_type=object_motion,
+                distance=50,
+                num_frames=49,
+                tracking_method="spatracker"
+            ).unsqueeze(0)
+            print(f"Object motion '{object_motion}' applied using provided mask")
+            _, tracking_tensor = das.visualize_tracking_spatracker(video_tensor, pred_tracks, pred_visibility, T_Firsts)
+            print('Export tracking video via SpaTracker')
+        output_path = das.apply_tracking(
+            video_tensor=video_tensor,
+            fps=8,
+            tracking_tensor=tracking_tensor,
+            img_cond_tensor=repaint_img_tensor,
+            prompt=prompt,
+            checkpoint_path=DEFAULT_MODEL_PATH
+        )
+        return output_path
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")
         return None
 def process_mesh_animation(source, prompt, tracking_video, ma_repaint_option, ma_repaint_image):
     """Process mesh animation task"""
     try:
         if tracking_video_path is None:
             return None
+        das = get_das_pipeline()
+        video_tensor, fps, is_video = load_media(input_video_path)
+        tracking_tensor, tracking_fps, _ = load_media(tracking_video_path)
+        repaint_img_tensor = None
         if ma_repaint_image is not None:
             repaint_path = save_uploaded_file(ma_repaint_image)
+            repaint_img_tensor, _, _ = load_media(repaint_path)
+            repaint_img_tensor = repaint_img_tensor[0]  # 获取第一帧
         elif ma_repaint_option == "Yes":
+            repainter = FirstFrameRepainter(gpu_id=GPU_ID, output_dir=OUTPUT_DIR)
+            repaint_img_tensor = repainter.repaint(
+                video_tensor[0],
+                prompt=prompt,
+                depth_path=None
+            )
+        output_path = das.apply_tracking(
+            video_tensor=video_tensor,
+            fps=8,
+            tracking_tensor=tracking_tensor,
+            img_cond_tensor=repaint_img_tensor,
+            prompt=prompt,
+            checkpoint_path=DEFAULT_MODEL_PATH
+        )
+        return output_path
     except Exception as e:
         import traceback
         print(f"Processing failed: {str(e)}\n{traceback.format_exc()}")