Spaces:

ostapagon
/

mast3r-3dgs

Running on Zero

App Files Files Community

ostapagon commited on Jan 2

Commit

6db5fd9

1 Parent(s): 036b7d1

Add demo file. Change sdk to gradio. Add wild-gaussian-splatting submodule

Browse files

Files changed (9) hide show

.gitmodules +4 -0
README.md +2 -2
app.py +38 -0
demo/__init__.py +0 -0
demo/gs_demo.py +148 -0
demo/gs_train.py +289 -0
demo/mast3r_demo.py +382 -0
requirements.txt +1 -0
wild-gaussian-splatting +1 -0

.gitmodules ADDED Viewed

	@@ -0,0 +1,4 @@

+[submodule "wild-gaussian-splatting"]
+	path = wild-gaussian-splatting
+	url = https://github.com/ostapagon/wild-gaussian-splatting.git
+	branch = mast3r_3dgs

README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ---
-title: Mast3r 3dgs
 emoji: 😻
 colorFrom: gray
 colorTo: indigo
-sdk: docker
 pinned: false
 ---

 ---
+title: MASt3r+3DGS
 emoji: 😻
 colorFrom: gray
 colorTo: indigo
+sdk: gradio
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import sys
+sys.path.append('wild-gaussian-splatting/mast3r/')
+sys.path.append('demo/')
+import os
+import tempfile
+import gradio as gr
+from mast3r.demo import get_args_parser
+from mast3r.utils.misc import hash_md5
+from mast3r_demo import mast3r_demo_tab
+from gs_demo import gs_demo_tab
+if __name__ == '__main__':
+    parser = get_args_parser()
+    args = parser.parse_args()
+    if args.server_name is not None:
+        server_name = args.server_name
+    else:
+        server_name = '0.0.0.0' if args.local_network else '127.0.0.1'
+    weights_path = args.weights if args.weights is not None else "naver/" + args.model_name
+    chkpt_tag = hash_md5(weights_path)
+    with tempfile.TemporaryDirectory(suffix='demo') as tmpdirname:
+        cache_path = os.path.join(tmpdirname, chkpt_tag)
+        os.makedirs(cache_path, exist_ok=True)
+        with gr.Blocks() as demo:
+            with gr.Tabs():
+                with gr.Tab("MASt3R Demo"):
+                    mast3r_demo_tab(cache_path, weights_path, args.device)
+                with gr.Tab("Gaussian Splatting Demo"):
+                    gs_demo_tab(cache_path)
+            demo.launch(server_name=server_name, server_port=args.server_port)
+# python3 demo.py --weights "/app/mast3r/checkpoints/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth" --device "cuda" --server_port 3334 --local_network "$@"

demo/__init__.py ADDED Viewed

File without changes

demo/gs_demo.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import gradio as gr
+from gs_train import train
+import os
+DATASET_DIR = "colmap_data"
+def get_dataset_folders(datasets_path):
+    try:
+        return [f for f in os.listdir(datasets_path) if os.path.isdir(os.path.join(datasets_path, f))]
+    except FileNotFoundError:
+        return []
+def gs_demo_tab(cache_path):
+    datasets_path = "/app/data/scenes/"
+    # dataset_path = os.path.join(cache_path, DATASET_DIR)
+    def start_training(selected_folder, *args):
+        selected_data_path = os.path.join(datasets_path, selected_folder)
+        return train(selected_data_path, *args)
+    def get_context():
+        return gr.Blocks(delete_cache=(True, True))
+    with get_context() as gs_demo:
+        gr.Markdown("""
+        <style>
+        .fixed-size-video video {
+            max-height: 400px !important;
+            height: 400px !important;
+            object-fit: contain;
+        }
+        </style>
+        """)
+        gr.Markdown("# Gaussian Splatting Training Demo")
+        refresh_button = gr.Button("Refresh Datasets", elem_classes="refresh-button")
+        dataset_dropdown = gr.Dropdown(label="Select Dataset", choices=[], value="")
+        def update_dataset_dropdown():
+            print("update_dataset_dropdown, cache_path", cache_path)
+            # Update the dataset folders list
+            dataset_folders = get_dataset_folders(datasets_path)
+            # dataset_folders = "/app/data/scenes/"
+            print("dataset_folders", dataset_folders)
+            # Only set a default value if there are folders available
+            default_value = dataset_folders[0] if dataset_folders else None
+            return gr.Dropdown(label="Select Dataset", choices=dataset_folders, value=default_value)
+        # Set the update function to be called when the refresh button is clicked
+        refresh_button.click(fn=update_dataset_dropdown, inputs=None, outputs=dataset_dropdown)
+        with gr.Accordion("Model Parameters", open=False):
+            with gr.Row():
+                with gr.Column():
+                    sh_degree = gr.Number(label="SH Degree", value=3)
+                    model_path = gr.Textbox(label="Model Path", value="")
+                    images = gr.Textbox(label="Images", value="images")
+                    resolution = gr.Number(label="Resolution", value=-1)
+                    white_background = gr.Checkbox(label="White Background", value=True)
+                    data_device = gr.Dropdown(label="Data Device", choices=["cuda", "cpu"], value="cuda")
+                    eval = gr.Checkbox(label="Eval", value=False)
+        with gr.Accordion("Pipeline Parameters", open=False):
+            with gr.Row():
+                with gr.Column():
+                    convert_SHs_python = gr.Checkbox(label="Convert SHs Python", value=False)
+                    compute_cov3D_python = gr.Checkbox(label="Compute Cov3D Python", value=False)
+                    debug = gr.Checkbox(label="Debug", value=False)
+        with gr.Accordion("Optimization Parameters", open=False):
+            with gr.Row():
+                with gr.Column():
+                    iterations = gr.Number(label="Iterations", value=1000)
+                    position_lr_init = gr.Number(label="Position LR Init", value=0.00016)
+                    position_lr_final = gr.Number(label="Position LR Final", value=0.0000016)
+                    position_lr_delay_mult = gr.Number(label="Position LR Delay Mult", value=0.01)
+                    position_lr_max_steps = gr.Number(label="Position LR Max Steps", value=30000)
+                with gr.Column():
+                    feature_lr = gr.Number(label="Feature LR", value=0.0025)
+                    opacity_lr = gr.Number(label="Opacity LR", value=0.05)
+                    scaling_lr = gr.Number(label="Scaling LR", value=0.005)
+                    rotation_lr = gr.Number(label="Rotation LR", value=0.001)
+                    percent_dense = gr.Number(label="Percent Dense", value=0.01)
+                with gr.Column():
+                    lambda_dssim = gr.Number(label="Lambda DSSIM", value=0.2)
+                    densification_interval = gr.Number(label="Densification Interval", value=100)
+                    opacity_reset_interval = gr.Number(label="Opacity Reset Interval", value=3000)
+                    densify_from_iter = gr.Number(label="Densify From Iter", value=500)
+                    densify_until_iter = gr.Number(label="Densify Until Iter", value=15000)
+                    densify_grad_threshold = gr.Number(label="Densify Grad Threshold", value=0.0002)
+                    random_background = gr.Checkbox(label="Random Background", value=False)
+        start_button = gr.Button("Start Training")
+        # Add state variable to store model path
+        model_path_state = gr.State()
+        # Add video output and load model button with fixed scale
+        video_output = gr.Video(
+            label="Training Progress",
+            height=400,  # Fixed height
+            width="100%",  # Full width of container
+            autoplay=False,  # Prevent autoplay
+            show_label=True,
+            container=True,
+            elem_classes="fixed-size-video"  # Add custom class for potential CSS
+        )
+        load_model_button = gr.Button("Load 3D Model", interactive=False)
+        output = gr.Model3D(label="3D Model Output", visible=False)
+        def handle_training_complete(selected_folder, *args):
+            # Construct the full path to the selected dataset
+            selected_data_path = os.path.join(datasets_path, selected_folder)
+            # Call the training function with the full path
+            video_path, model_path = train(selected_data_path, *args)
+            # Then return all required outputs
+            return [
+                video_path,           # video output
+                gr.Button(value="Load 3D Model", interactive=True),  # Return new button with updated properties
+                gr.Model3D(visible=False),  # keep 3D model hidden
+                model_path            # store model path in state
+            ]
+        def load_model(model_path):
+            if not model_path:
+                return gr.Model3D(visible=False)
+            return gr.Model3D(value=model_path, visible=True)
+        # Connect the start training button
+        start_button.click(
+            fn=handle_training_complete,
+            inputs=[
+                dataset_dropdown, sh_degree, model_path, images, resolution, white_background, data_device, eval,
+                convert_SHs_python, compute_cov3D_python, debug,
+                iterations, position_lr_init, position_lr_final, position_lr_delay_mult,
+                position_lr_max_steps, feature_lr, opacity_lr, scaling_lr, rotation_lr,
+                percent_dense, lambda_dssim, densification_interval, opacity_reset_interval,
+                densify_from_iter, densify_until_iter, densify_grad_threshold, random_background
+            ],
+            outputs=[video_output, load_model_button, output, model_path_state]
+        )
+        # Connect the load model button
+        load_model_button.click(
+            fn=load_model,
+            inputs=[model_path_state],
+            outputs=output
+        )
+    return gs_demo

demo/gs_train.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import sys
+import os
+import torch
+from random import randint
+import uuid
+from tqdm.auto import tqdm
+import gradio as gr
+import importlib.util
+# Add the path to the gaussian-splatting repository
+gaussian_splatting_path = 'wild-gaussian-splatting/gaussian-splatting/'
+sys.path.append(gaussian_splatting_path)
+# Import necessary modules from the gaussian-splatting directory
+from utils.loss_utils import l1_loss, ssim
+from gaussian_renderer import render, network_gui
+from scene import Scene, GaussianModel
+from utils.general_utils import safe_state
+from utils.image_utils import psnr
+# Dynamically import the train module from the gaussian-splatting directory
+train_spec = importlib.util.spec_from_file_location("gaussian_splatting_train", os.path.join(gaussian_splatting_path, "train.py"))
+gaussian_splatting_train = importlib.util.module_from_spec(train_spec)
+train_spec.loader.exec_module(gaussian_splatting_train)
+# Import the necessary functions from the dynamically loaded module
+prepare_output_and_logger = gaussian_splatting_train.prepare_output_and_logger
+training_report = gaussian_splatting_train.training_report
+from dataclasses import dataclass, field
+@dataclass
+class PipelineParams:
+    convert_SHs_python: bool = False
+    compute_cov3D_python: bool = False
+    debug: bool = False
+@dataclass
+class OptimizationParams:
+    iterations: int = 7000
+    position_lr_init: float = 0.00016
+    position_lr_final: float = 0.0000016
+    position_lr_delay_mult: float = 0.01
+    position_lr_max_steps: int = 30_000
+    feature_lr: float = 0.0025
+    opacity_lr: float = 0.05
+    scaling_lr: float = 0.005
+    rotation_lr: float = 0.001
+    percent_dense: float = 0.01
+    lambda_dssim: float = 0.2
+    densification_interval: int = 100
+    opacity_reset_interval: int = 3000
+    densify_from_iter: int = 500
+    densify_until_iter: int = 15_000
+    densify_grad_threshold: float = 0.0002
+    random_background: bool = False
+@dataclass
+class ModelParams:
+    sh_degree: int = 3
+    source_path: str = "../data/scenes/turtle/"  # Default path, adjust as needed
+    model_path: str = ""
+    images: str = "images"
+    resolution: int = -1
+    white_background: bool = True
+    data_device: str = "cuda"
+    eval: bool = False
+@dataclass
+class TrainingArgs:
+    ip: str = "0.0.0.0"
+    port: int = 6007
+    debug_from: int = -1
+    detect_anomaly: bool = False
+    test_iterations: list[int] = field(default_factory=lambda: [7_000, 30_000])
+    save_iterations: list[int] = field(default_factory=lambda: [7_000, 30_000])
+    quiet: bool = False
+    checkpoint_iterations: list[int] = field(default_factory=lambda: [7_000, 15_000, 30_000])
+    start_checkpoint: str = None
+def train(
+    data_source_path, sh_degree, model_path, images, resolution, white_background, data_device, eval,
+    convert_SHs_python, compute_cov3D_python, debug,
+    iterations, position_lr_init, position_lr_final, position_lr_delay_mult,
+    position_lr_max_steps, feature_lr, opacity_lr, scaling_lr, rotation_lr,
+    percent_dense, lambda_dssim, densification_interval, opacity_reset_interval,
+    densify_from_iter, densify_until_iter, densify_grad_threshold, random_background
+):
+    print(data_source_path)
+    # Create instances of the parameter dataclasses
+    dataset = ModelParams(
+        sh_degree=sh_degree,
+        source_path=data_source_path,
+        model_path=model_path,
+        images=images,
+        resolution=resolution,
+        white_background=white_background,
+        data_device=data_device,
+        eval=eval
+    )
+    pipe = PipelineParams(
+        convert_SHs_python=convert_SHs_python,
+        compute_cov3D_python=compute_cov3D_python,
+        debug=debug
+    )
+    opt = OptimizationParams(
+        iterations=iterations,
+        position_lr_init=position_lr_init,
+        position_lr_final=position_lr_final,
+        position_lr_delay_mult=position_lr_delay_mult,
+        position_lr_max_steps=position_lr_max_steps,
+        feature_lr=feature_lr,
+        opacity_lr=opacity_lr,
+        scaling_lr=scaling_lr,
+        rotation_lr=rotation_lr,
+        percent_dense=percent_dense,
+        lambda_dssim=lambda_dssim,
+        densification_interval=densification_interval,
+        opacity_reset_interval=opacity_reset_interval,
+        densify_from_iter=densify_from_iter,
+        densify_until_iter=densify_until_iter,
+        densify_grad_threshold=densify_grad_threshold,
+        random_background=random_background
+    )
+    args = TrainingArgs()
+    testing_iterations = args.test_iterations
+    saving_iterations = args.save_iterations
+    checkpoint_iterations = args.checkpoint_iterations
+    debug_from = args.debug_from
+    tb_writer = prepare_output_and_logger(dataset)
+    gaussians = GaussianModel(dataset.sh_degree)
+    scene = Scene(dataset, gaussians)
+    gaussians.training_setup(opt)
+    bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
+    background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
+    iter_start = torch.cuda.Event(enable_timing = True)
+    iter_end = torch.cuda.Event(enable_timing = True)
+    viewpoint_stack = None
+    ema_loss_for_log = 0.0
+    first_iter = 0
+    progress_bar = tqdm(range(first_iter, opt.iterations), desc="Training progress")
+    first_iter += 1
+    point_cloud_path = ""
+    progress = gr.Progress()  # Initialize the progress bar
+    for iteration in range(first_iter, opt.iterations + 1):
+        iter_start.record()
+        gaussians.update_learning_rate(iteration)
+        # Every 1000 its we increase the levels of SH up to a maximum degree
+        if iteration % 1000 == 0:
+            gaussians.oneupSHdegree()
+        # Pick a random Camera
+        if not viewpoint_stack:
+            viewpoint_stack = scene.getTrainCameras().copy()
+        viewpoint_cam = viewpoint_stack.pop(randint(0, len(viewpoint_stack)-1))
+        # Render
+        if (iteration - 1) == debug_from:
+            pipe.debug = True
+        bg = torch.rand((3), device="cuda") if opt.random_background else background
+        render_pkg = render(viewpoint_cam, gaussians, pipe, bg)
+        image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"]
+        # Loss
+        gt_image = viewpoint_cam.original_image.cuda()
+        Ll1 = l1_loss(image, gt_image)
+        loss = (1.0 - opt.lambda_dssim) * Ll1 + opt.lambda_dssim * (1.0 - ssim(image, gt_image))
+        loss.backward()
+        iter_end.record()
+        with torch.no_grad():
+            # Progress bar
+            ema_loss_for_log = 0.4 * loss.item() + 0.6 * ema_loss_for_log
+            if iteration % 10 == 0:
+                progress_bar.set_postfix({"Loss": f"{ema_loss_for_log:.{7}f}"})
+                progress_bar.update(10)
+                progress(iteration / opt.iterations)  # Update Gradio progress bar
+            if iteration == opt.iterations:
+                progress_bar.close()
+            # Log and save
+            training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background))
+            if (iteration == opt.iterations):
+                point_cloud_path = os.path.join(os.path.join(dataset.model_path, "point_cloud/iteration_{}".format(iteration)), "point_cloud.ply")
+                print("\n[ITER {}] Saving Gaussians to {}".format(iteration, point_cloud_path))
+                scene.save(iteration)
+            # Densification
+            if iteration < opt.densify_until_iter:
+                # Keep track of max radii in image-space for pruning
+                gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter])
+                gaussians.add_densification_stats(viewspace_point_tensor, visibility_filter)
+                if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0:
+                    size_threshold = 20 if iteration > opt.opacity_reset_interval else None
+                    gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold)
+                if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter):
+                    gaussians.reset_opacity()
+                # Optimizer step
+                if iteration < opt.iterations:
+                    gaussians.optimizer.step()
+                    gaussians.optimizer.zero_grad(set_to_none = True)
+                if (iteration == opt.iterations):
+                    print("\n[ITER {}] Saving Checkpoint".format(iteration))
+                    torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")
+    from os import makedirs
+    from utils.graphics_utils import focal2fov, fov2focal, getProjectionMatrix
+    import torchvision
+    import subprocess
+    @torch.no_grad()
+    def render_path(dataset : ModelParams, iteration : int, pipeline : PipelineParams, render_resize_method='crop'):
+        """
+        render_resize_method: crop, pad
+        """
+        gaussians = GaussianModel(dataset.sh_degree)
+        scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False)
+        iteration = scene.loaded_iter
+        bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
+        background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
+        model_path = dataset.model_path
+        name = "render"
+        views = scene.getRenderCameras()
+        # print(len(views))
+        render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders")
+        makedirs(render_path, exist_ok=True)
+        for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
+            if render_resize_method == 'crop':
+                image_size = 256
+            elif render_resize_method == 'pad':
+                image_size = max(view.image_width, view.image_height)
+            else:
+                raise NotImplementedError
+            view.original_image = torch.zeros((3, image_size, image_size), device=view.original_image.device)
+            focal_length_x = fov2focal(view.FoVx, view.image_width)
+            focal_length_y = fov2focal(view.FoVy, view.image_height)
+            view.image_width = image_size
+            view.image_height = image_size
+            view.FoVx = focal2fov(focal_length_x, image_size)
+            view.FoVy = focal2fov(focal_length_y, image_size)
+            view.projection_matrix = getProjectionMatrix(znear=view.znear, zfar=view.zfar, fovX=view.FoVx, fovY=view.FoVy).transpose(0,1).cuda().float()
+            view.full_proj_transform = (view.world_view_transform.unsqueeze(0).bmm(view.projection_matrix.unsqueeze(0))).squeeze(0)
+            render_pkg = render(view, gaussians, pipeline, background)
+            rendering = render_pkg["render"]
+            torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png"))
+        # Use ffmpeg to output video
+        renders_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders.mp4")
+        # Use ffmpeg to output video
+        subprocess.run(["ffmpeg", "-y",
+                    "-framerate", "24",
+                    "-i", os.path.join(render_path, "%05d.png"),
+                    "-vf", "pad=ceil(iw/2)*2:ceil(ih/2)*2",
+                    "-c:v", "libx264",
+                    "-pix_fmt", "yuv420p",
+                    "-crf", "23",
+                    # "-pix_fmt", "yuv420p",  # Set pixel format for compatibility
+                    renders_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+                    )
+        return renders_path
+    renders_path = render_path(dataset, opt.iterations, pipe, render_resize_method='crop')
+    return renders_path, point_cloud_path

demo/mast3r_demo.py ADDED Viewed

	@@ -0,0 +1,382 @@

+#!/usr/bin/env python3
+# Copyright (C) 2024-present Naver Corporation. All rights reserved.
+# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
+#
+# --------------------------------------------------------
+# sparse gradio demo functions
+# --------------------------------------------------------
+import sys
+import math
+import gradio
+import os
+import numpy as np
+import functools
+import trimesh
+import copy
+from scipy.spatial.transform import Rotation
+import tempfile
+import shutil
+from mast3r.cloud_opt.sparse_ga import sparse_global_alignment
+from mast3r.cloud_opt.tsdf_optimizer import TSDFPostProcess
+from mast3r.model import AsymmetricMASt3R
+from dust3r.image_pairs import make_pairs
+from dust3r.utils.image import load_images
+from dust3r.utils.device import to_numpy
+from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
+from dust3r.demo import get_args_parser as dust3r_get_args_parser
+sys.path.append(os.path.join(os.path.dirname(__file__), '../wild-gaussian-splatting/gaussian-splatting'))
+from src.colmap_dataset_utils import (
+    inv,
+    init_filestructure,
+    save_images_masks,
+    save_cameras,
+    save_imagestxt,
+    save_pointcloud,
+    save_pointcloud_with_normals
+)
+import matplotlib.pyplot as pl
+import torch
+class SparseGAState():
+    def __init__(self, sparse_ga, cache_dir=None, outfile_name=None):
+        self.sparse_ga = sparse_ga
+        self.cache_dir = cache_dir
+        self.outfile_name = outfile_name
+    def __del__(self):
+        if self.cache_dir is not None and os.path.isdir(self.cache_dir):
+            shutil.rmtree(self.cache_dir)
+        self.cache_dir = None
+        if self.outfile_name is not None and os.path.isfile(self.outfile_name):
+            os.remove(self.outfile_name)
+        self.outfile_name = None
+def get_args_parser():
+    parser = dust3r_get_args_parser()
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--gradio_delete_cache', default=None, type=int,
+                        help='age/frequency at which gradio removes the file. If >0, matching cache is purged')
+    actions = parser._actions
+    for action in actions:
+        if action.dest == 'model_name':
+            action.choices = ["MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric"]
+    # change defaults
+    parser.prog = 'mast3r demo'
+    return parser
+def _convert_scene_output_to_glb(outfile, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
+                                 cam_color=None, as_pointcloud=False,
+                                 transparent_cams=False, silent=False):
+    assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
+    pts3d = to_numpy(pts3d)
+    imgs = to_numpy(imgs)
+    focals = to_numpy(focals)
+    cams2world = to_numpy(cams2world)
+    scene = trimesh.Scene()
+    # full pointcloud
+    if as_pointcloud:
+        pts = np.concatenate([p[m.ravel()] for p, m in zip(pts3d, mask)]).reshape(-1, 3)
+        col = np.concatenate([p[m] for p, m in zip(imgs, mask)]).reshape(-1, 3)
+        valid_msk = np.isfinite(pts.sum(axis=1))
+        pct = trimesh.PointCloud(pts[valid_msk], colors=col[valid_msk])
+        scene.add_geometry(pct)
+    else:
+        meshes = []
+        for i in range(len(imgs)):
+            pts3d_i = pts3d[i].reshape(imgs[i].shape)
+            msk_i = mask[i] & np.isfinite(pts3d_i.sum(axis=-1))
+            meshes.append(pts3d_to_trimesh(imgs[i], pts3d_i, msk_i))
+        mesh = trimesh.Trimesh(**cat_meshes(meshes))
+        scene.add_geometry(mesh)
+    # add each camera
+    for i, pose_c2w in enumerate(cams2world):
+        if isinstance(cam_color, list):
+            camera_edge_color = cam_color[i]
+        else:
+            camera_edge_color = cam_color or CAM_COLORS[i % len(CAM_COLORS)]
+        add_scene_cam(scene, pose_c2w, camera_edge_color,
+                      None if transparent_cams else imgs[i], focals[i],
+                      imsize=imgs[i].shape[1::-1], screen_width=cam_size)
+    rot = np.eye(4)
+    rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
+    scene.apply_transform(np.linalg.inv(cams2world[0] @ OPENGL @ rot))
+    if not silent:
+        print('(exporting 3D scene to', outfile, ')')
+    scene.export(file_obj=outfile)
+    return outfile
+def get_3D_model_from_scene(silent, scene_state, min_conf_thr=2, as_pointcloud=False, mask_sky=False,
+                            clean_depth=False, transparent_cams=False, cam_size=0.05, TSDF_thresh=0):
+    """
+    extract 3D_model (glb file) from a reconstructed scene
+    """
+    if scene_state is None:
+        return None
+    outfile = scene_state.outfile_name
+    if outfile is None:
+        return None
+    # get optimized values from scene
+    scene = scene_state.sparse_ga
+    rgbimg = scene.imgs
+    focals = scene.get_focals().cpu()
+    cams2world = scene.get_im_poses().cpu()
+    # 3D pointcloud from depthmap, poses and intrinsics
+    if TSDF_thresh > 0:
+        tsdf = TSDFPostProcess(scene, TSDF_thresh=TSDF_thresh)
+        pts3d, _, confs = to_numpy(tsdf.get_dense_pts3d(clean_depth=clean_depth))
+    else:
+        pts3d, _, confs = to_numpy(scene.get_dense_pts3d(clean_depth=clean_depth))
+    torch.save(confs, '/app/data/confs.pt')
+    msk = to_numpy([c > min_conf_thr for c in confs])
+    return _convert_scene_output_to_glb(outfile, rgbimg, pts3d, msk, focals, cams2world, as_pointcloud=as_pointcloud,
+                                        transparent_cams=transparent_cams, cam_size=cam_size, silent=silent)
+def save_colmap_scene(scene, save_dir, min_conf_thr=2, clean_depth=False):
+    cam2world = scene.get_im_poses().detach().cpu().numpy()
+    world2cam = inv(cam2world) #
+    principal_points = scene.get_principal_points().detach().cpu().numpy()
+    focals = scene.get_focals().detach().cpu().numpy()[..., None]
+    imgs = np.array(scene.imgs)
+    pts3d, _, confs = scene.get_dense_pts3d(clean_depth=clean_depth)
+    pts3d = [i.detach().reshape(imgs[0].shape) for i in pts3d] #
+    masks = to_numpy([c > min_conf_thr for c in to_numpy(confs)])
+    # move
+    mask_images = True
+    save_path, images_path, masks_path, sparse_path = init_filestructure(save_dir)
+    save_images_masks(imgs, masks, images_path, masks_path, mask_images)
+    save_cameras(focals, principal_points, sparse_path, imgs_shape=imgs.shape)
+    save_imagestxt(world2cam, sparse_path)
+    save_pointcloud_with_normals(imgs, pts3d, masks, sparse_path)
+    return save_path
+def get_reconstructed_scene(outdir, model, device, silent, image_size, current_scene_state,
+                            filelist, optim_level, lr1, niter1, lr2, niter2, min_conf_thr, matching_conf_thr,
+                            as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, scenegraph_type, winsize,
+                            win_cyclic, refid, TSDF_thresh, shared_intrinsics, **kw):
+    """
+    from a list of images, run mast3r inference, sparse global aligner.
+    then run get_3D_model_from_scene
+    """
+    imgs = load_images(filelist, size=image_size, verbose=not silent)
+    if len(imgs) == 1:
+        imgs = [imgs[0], copy.deepcopy(imgs[0])]
+        imgs[1]['idx'] = 1
+        filelist = [filelist[0], filelist[0] + '_2']
+    scene_graph_params = [scenegraph_type]
+    if scenegraph_type in ["swin", "logwin"]:
+        scene_graph_params.append(str(winsize))
+    elif scenegraph_type == "oneref":
+        scene_graph_params.append(str(refid))
+    if scenegraph_type in ["swin", "logwin"] and not win_cyclic:
+        scene_graph_params.append('noncyclic')
+    scene_graph = '-'.join(scene_graph_params)
+    pairs = make_pairs(imgs, scene_graph=scene_graph, prefilter=None, symmetrize=True)
+    if optim_level == 'coarse':
+        niter2 = 0
+    base_cache_dir = os.path.join(outdir, 'cache')
+    os.makedirs(base_cache_dir, exist_ok=True)
+    def get_next_dir(base_dir):
+        run_counter = 0
+        while True:
+            run_cache_dir = os.path.join(base_dir, f"run_{run_counter}")
+            if not os.path.exists(run_cache_dir):
+                os.makedirs(run_cache_dir)
+                break
+            run_counter += 1
+        return run_cache_dir
+    cache_dir = get_next_dir(base_cache_dir)
+    scene = sparse_global_alignment(filelist, pairs, cache_dir,
+                                    model, lr1=lr1, niter1=niter1, lr2=lr2, niter2=niter2, device=device,
+                                    opt_depth='depth' in optim_level, shared_intrinsics=shared_intrinsics,
+                                    matching_conf_thr=matching_conf_thr, **kw)
+    base_colmapdata_dir = os.path.join(outdir, 'colmap_data')
+    os.makedirs(base_colmapdata_dir, exist_ok=True)
+    colmap_data_dir = get_next_dir(base_colmapdata_dir)
+    #
+    save_colmap_scene(scene, colmap_data_dir, min_conf_thr, clean_depth)
+    if current_scene_state is not None and \
+       current_scene_state.outfile_name is not None:
+        outfile_name = current_scene_state.outfile_name
+    else:
+        outfile_name = tempfile.mktemp(suffix='_scene.glb', dir=outdir)
+    scene_state = SparseGAState(scene, cache_dir, outfile_name)
+    outfile = get_3D_model_from_scene(silent, scene_state, min_conf_thr, as_pointcloud, mask_sky,
+                                      clean_depth, transparent_cams, cam_size, TSDF_thresh)
+    print(f"colmap_data_dir: {colmap_data_dir}")
+    print(f"outfile_name: {outfile_name}")
+    print(f"cache_dir: {cache_dir}")
+    return scene_state, outfile
+def set_scenegraph_options(inputfiles, win_cyclic, refid, scenegraph_type):
+    num_files = len(inputfiles) if inputfiles is not None else 1
+    show_win_controls = scenegraph_type in ["swin", "logwin"]
+    show_winsize = scenegraph_type in ["swin", "logwin"]
+    show_cyclic = scenegraph_type in ["swin", "logwin"]
+    max_winsize, min_winsize = 1, 1
+    if scenegraph_type == "swin":
+        if win_cyclic:
+            max_winsize = max(1, math.ceil((num_files - 1) / 2))
+        else:
+            max_winsize = num_files - 1
+    elif scenegraph_type == "logwin":
+        if win_cyclic:
+            half_size = math.ceil((num_files - 1) / 2)
+            max_winsize = max(1, math.ceil(math.log(half_size, 2)))
+        else:
+            max_winsize = max(1, math.ceil(math.log(num_files, 2)))
+    winsize = gradio.Slider(label="Scene Graph: Window Size", value=max_winsize,
+                            minimum=min_winsize, maximum=max_winsize, step=1, visible=show_winsize)
+    win_cyclic = gradio.Checkbox(value=win_cyclic, label="Cyclic sequence", visible=show_cyclic)
+    win_col = gradio.Column(visible=show_win_controls)
+    refid = gradio.Slider(label="Scene Graph: Id", value=0, minimum=0,
+                          maximum=num_files - 1, step=1, visible=scenegraph_type == 'oneref')
+    return win_col, winsize, win_cyclic, refid
+def mast3r_demo_tab(cache_path, weights_path, device, silent=False):
+    model = AsymmetricMASt3R.from_pretrained(weights_path).to(device)
+    if not silent:
+        print('Outputing stuff in', cache_path)
+    recon_fun = functools.partial(get_reconstructed_scene, cache_path, model, device,
+                                  silent)
+    model_from_scene_fun = functools.partial(get_3D_model_from_scene, silent)
+    def get_context():
+        css = """.gradio-container {margin: 0 !important; min-width: 100%};"""
+        title = "MASt3R Demo"
+        return gradio.Blocks(css=css, title=title, delete_cache=(True, True))
+    with get_context() as demo:
+        # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
+        scene = gradio.State(None)
+        gradio.HTML('<h2 style="text-align: center;">MASt3R Demo</h2>')
+        with gradio.Column():
+            inputfiles = gradio.File(file_count="multiple")
+            with gradio.Row():
+                with gradio.Column():
+                    with gradio.Row():
+                        lr1 = gradio.Slider(label="Coarse LR", value=0.07, minimum=0.01, maximum=0.2, step=0.01)
+                        niter1 = gradio.Number(value=500, precision=0, minimum=0, maximum=10_000,
+                                               label="num_iterations", info="For coarse alignment!")
+                        lr2 = gradio.Slider(label="Fine LR", value=0.014, minimum=0.005, maximum=0.05, step=0.001)
+                        niter2 = gradio.Number(value=200, precision=0, minimum=0, maximum=100_000,
+                                               label="num_iterations", info="For refinement!")
+                        optim_level = gradio.Dropdown(["coarse", "refine", "refine+depth"],
+                                                      value='refine+depth', label="OptLevel",
+                                                      info="Optimization level")
+                        image_size = gradio.Dropdown(choices=[512, 224], label="Image Size", value=512)
+                    with gradio.Row():
+                        matching_conf_thr = gradio.Slider(label="Matching Confidence Thr", value=5.,
+                                                          minimum=0., maximum=30., step=0.1,
+                                                          info="Before Fallback to Regr3D!")
+                        shared_intrinsics = gradio.Checkbox(value=False, label="Shared intrinsics",
+                                                            info="Only optimize one set of intrinsics for all views")
+                        scenegraph_type = gradio.Dropdown([("complete: all possible image pairs", "complete"),
+                                                           ("swin: sliding window", "swin"),
+                                                           ("logwin: sliding window with long range", "logwin"),
+                                                           ("oneref: match one image with all", "oneref")],
+                                                          value='complete', label="Scenegraph",
+                                                          info="Define how to make pairs",
+                                                          interactive=True)
+                        with gradio.Column(visible=False) as win_col:
+                            winsize = gradio.Slider(label="Scene Graph: Window Size", value=1,
+                                                    minimum=1, maximum=1, step=1)
+                            win_cyclic = gradio.Checkbox(value=False, label="Cyclic sequence")
+                        refid = gradio.Slider(label="Scene Graph: Id", value=0,
+                                              minimum=0, maximum=0, step=1, visible=False)
+            run_btn = gradio.Button("Run")
+            with gradio.Row():
+                # adjust the confidence threshold
+                min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.5, minimum=0.0, maximum=10, step=0.1)
+                # adjust the camera size in the output pointcloud
+                cam_size = gradio.Slider(label="cam_size", value=0.2, minimum=0.001, maximum=1.0, step=0.001)
+                TSDF_thresh = gradio.Slider(label="TSDF Threshold", value=0., minimum=0., maximum=1., step=0.01)
+            with gradio.Row():
+                as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
+                # two post process implemented
+                mask_sky = gradio.Checkbox(value=False, label="Mask sky")
+                clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps")
+                transparent_cams = gradio.Checkbox(value=False, label="Transparent cameras")
+            outmodel = gradio.Model3D()
+            # events
+            scenegraph_type.change(set_scenegraph_options,
+                                   inputs=[inputfiles, win_cyclic, refid, scenegraph_type],
+                                   outputs=[win_col, winsize, win_cyclic, refid])
+            inputfiles.change(set_scenegraph_options,
+                              inputs=[inputfiles, win_cyclic, refid, scenegraph_type],
+                              outputs=[win_col, winsize, win_cyclic, refid])
+            win_cyclic.change(set_scenegraph_options,
+                              inputs=[inputfiles, win_cyclic, refid, scenegraph_type],
+                              outputs=[win_col, winsize, win_cyclic, refid])
+            run_btn.click(fn=recon_fun,
+                          inputs=[image_size, scene, inputfiles, optim_level, lr1, niter1, lr2, niter2, min_conf_thr, matching_conf_thr,
+                                  as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
+                                  scenegraph_type, winsize, win_cyclic, refid, TSDF_thresh, shared_intrinsics],
+                          outputs=[scene, outmodel])
+            min_conf_thr.release(fn=model_from_scene_fun,
+                                 inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                         clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                                 outputs=outmodel)
+            cam_size.change(fn=model_from_scene_fun,
+                            inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                    clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                            outputs=outmodel)
+            TSDF_thresh.change(fn=model_from_scene_fun,
+                               inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                       clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                               outputs=outmodel)
+            as_pointcloud.change(fn=model_from_scene_fun,
+                                 inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                         clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                                 outputs=outmodel)
+            mask_sky.change(fn=model_from_scene_fun,
+                            inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                    clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                            outputs=outmodel)
+            clean_depth.change(fn=model_from_scene_fun,
+                               inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                       clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                               outputs=outmodel)
+            transparent_cams.change(model_from_scene_fun,
+                                    inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
+                                            clean_depth, transparent_cams, cam_size, TSDF_thresh],
+                                    outputs=outmodel)
+    return demo

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ -e wild-gaussian-splatting

wild-gaussian-splatting ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit fe8a9f389cdc583864f34a9e3ae32899c674229a