Spaces:

facebook
/

vggt

Running on Zero

App Files Files Community

JianyuanWang commited on 3 days ago

Commit

8b29fee

1 Parent(s): e038053

update

Browse files

Files changed (10) hide show

.gitattributes +1 -0
app.py +78 -49
examples/room/images/IMG_1507.HEIC.JPG.JPG +0 -3
examples/room/images/IMG_1508.HEIC.JPG.JPG +0 -3
examples/room/images/IMG_1509.HEIC.JPG.JPG +0 -3
examples/room/images/IMG_1510.HEIC.JPG.JPG +0 -3
examples/room/images/IMG_1511.HEIC.JPG.JPG +0 -3
examples/room/images/IMG_1512.HEIC.JPG.JPG +0 -3
examples/{room/images/IMG_1506.HEIC.JPG.JPG → videos/room_video.mp4} +2 -2
gradio_util.py +24 -23

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 examples/** filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 examples/** filter=lfs diff=lfs merge=lfs -text
+examples/videos/room_video.mp4 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -22,20 +22,7 @@ import spaces
-# def get_free_port():
-#     """Get a free port using socket."""
-#     # return 80
-#     # return 8080
-#     # return 10088 # for debugging
-#     # return 7860
-#     # return 7888
-#     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-#         s.bind(('', 0))
-#         port = s.getsockname()[1]
-#         return port
 cfg_file = "config/base.yaml"
 cfg = OmegaConf.load(cfg_file)
@@ -50,16 +37,18 @@ if "vggt_model" in pretrain_model:
 else:
     vggt_model.load_state_dict(pretrain_model, strict=True)
 # @torch.inference_mode()
-@spaces.GPU(duration=240)
 def vggt_demo(
     input_video,
     input_image,
     conf_thres=3.0,
     frame_filter="all",
     mask_black_bg=False,
 ):
     start_time = time.time()
     gc.collect()
@@ -133,10 +122,10 @@ def vggt_demo(
     np.savez(prediction_save_path, **predictions)
-    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}.glb"
-    glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg)
     glbscene.export(file_obj=glbfile)
     del predictions
@@ -155,9 +144,19 @@ def vggt_demo(
     log = "Success. Waiting for visualization."
     return glbfile, log, target_dir, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
-def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg):
     loaded = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
     # predictions = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
@@ -165,10 +164,10 @@ def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg):
     # for key in predictions.files: print(key)
     predictions = {key: loaded[key] for key in loaded.keys()}
-    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}.glb"
     if not os.path.exists(glbfile):
-        glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg)
         glbscene.export(file_obj=glbfile)
     return glbfile, "Updating Visualization", target_dir
@@ -198,6 +197,7 @@ drums_video = "examples/videos/drums_video.mp4"
 kitchen_video = "examples/videos/kitchen_video.mp4"
 ###########################################################################################
 apple_images = glob.glob(f'examples/apple/images/*')
 bonsai_images = glob.glob(f'examples/bonsai/images/*')
@@ -216,8 +216,7 @@ statue_images = glob.glob(f'examples/statue/images/*')
 drums_images = glob.glob(f'examples/drums/images/*')
 kitchen_images = glob.glob(f'examples/kitchen/images/*')
 ###########################################################################################
@@ -256,58 +255,88 @@ with gr.Blocks() as demo:
             with gr.Row():
                 conf_thres = gr.Slider(minimum=0.1, maximum=10.0, value=2.0, step=0.1, label="Conf Thres")
                 frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
-                mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
             log_output = gr.Textbox(label="Log")
-            # Add a hidden textbox for target_dir
-            target_dir_output = gr.Textbox(label="Target Dir", visible=False)
     with gr.Row():
         submit_btn = gr.Button("Reconstruct", scale=1)
-        revisual_btn = gr.Button("Update Visualization", scale=1)
         clear_btn = gr.ClearButton([input_video, input_images, reconstruction_output, log_output, target_dir_output], scale=1) #Modified reconstruction_output
     examples = [
-        [counter_video, counter_images, 1.5, "All", False],
-        [flower_video, flower_images, 1.5, "All", False],
-        [kitchen_video, kitchen_images, 3, "All", False],
-        [fern_video, fern_images, 1.5, "All", False],
-        # [person_video, person_images],
-        # [statue_video, statue_images],
-        # [drums_video, drums_images],
-        # [horns_video, horns_images, 1.5, "All", False],
-        # [apple_video, apple_images],
-        # [bonsai_video, bonsai_images],
     ]
     gr.Examples(examples=examples,
-                inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg],
                 outputs=[reconstruction_output, log_output, target_dir_output, frame_filter],  # Added frame_filter
                 fn=vggt_demo,  # Use our wrapper function
-                cache_examples=False,
                 examples_per_page=50,
                 )
     submit_btn.click(
-        vggt_demo,  # Use the same wrapper function
-        [input_video, input_images, conf_thres, frame_filter, mask_black_bg],
-        [reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter to outputs
-        # concurrency_limit=1
     )
-    revisual_btn.click(
         update_visualization,
-        [target_dir_output, conf_thres, frame_filter, mask_black_bg],
         [reconstruction_output, log_output, target_dir_output],
     )
-    # demo.launch(debug=True, share=True)
-    # demo.launch(server_name="0.0.0.0", server_port=8082, debug=True, share=False)
-    # demo.queue(max_size=20).launch(show_error=True, share=True)
-    demo.queue(max_size=20).launch(show_error=True) #, share=True, server_port=7888, server_name="0.0.0.0")
     # share=True
     # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
 ########################################################################################################################

+print("Loading model")
 cfg_file = "config/base.yaml"
 cfg = OmegaConf.load(cfg_file)
 else:
     vggt_model.load_state_dict(pretrain_model, strict=True)
+print("Model loaded")
 # @torch.inference_mode()
+@spaces.GPU(duration=120)
 def vggt_demo(
     input_video,
     input_image,
     conf_thres=3.0,
     frame_filter="all",
     mask_black_bg=False,
+    show_cam=True
 ):
     start_time = time.time()
     gc.collect()
     np.savez(prediction_save_path, **predictions)
+    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
+    glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
     glbscene.export(file_obj=glbfile)
     del predictions
     log = "Success. Waiting for visualization."
     return glbfile, log, target_dir, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
+def clear_fields():
+    """
+    Return None for reconstruction_output and target_dir_output
+    (and optionally reset frame_filter to "All" or something else if needed).
+    """
+    return None, None
+def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg, show_cam):
+    # Return early if target_dir is None or "None"
+    if target_dir is None or target_dir == "None":
+        return None, "No reconstruction available. Please run 'Reconstruct' first.", None
     loaded = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
     # predictions = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
     # for key in predictions.files: print(key)
     predictions = {key: loaded[key] for key in loaded.keys()}
+    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
     if not os.path.exists(glbfile):
+        glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
         glbscene.export(file_obj=glbfile)
     return glbfile, "Updating Visualization", target_dir
 kitchen_video = "examples/videos/kitchen_video.mp4"
+room_video = "examples/videos/room_video.mp4"
 ###########################################################################################
 apple_images = glob.glob(f'examples/apple/images/*')
 bonsai_images = glob.glob(f'examples/bonsai/images/*')
 drums_images = glob.glob(f'examples/drums/images/*')
 kitchen_images = glob.glob(f'examples/kitchen/images/*')
+room_images = glob.glob(f'examples/room/images/*')
 ###########################################################################################
             with gr.Row():
                 conf_thres = gr.Slider(minimum=0.1, maximum=10.0, value=2.0, step=0.1, label="Conf Thres")
                 frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
+                with gr.Column():
+                    show_cam = gr.Checkbox(label="Show Camera", value=True)
+                    mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
             log_output = gr.Textbox(label="Log")
+            # Add a hidden textbox for target_dir with default value "None"
+            target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
     with gr.Row():
         submit_btn = gr.Button("Reconstruct", scale=1)
+        # revisual_btn = gr.Button("Update Visualization", scale=1)
         clear_btn = gr.ClearButton([input_video, input_images, reconstruction_output, log_output, target_dir_output], scale=1) #Modified reconstruction_output
     examples = [
+        [room_video, room_images, 1.0, "All", False, True],
+        [counter_video, counter_images, 1.5, "All", False, True],
+        [flower_video, flower_images, 1.5, "All", False, True],
+        [kitchen_video, kitchen_images, 3, "All", False, True],
+        [fern_video, fern_images, 1.5, "All", False, True],
     ]
     gr.Examples(examples=examples,
+                inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
                 outputs=[reconstruction_output, log_output, target_dir_output, frame_filter],  # Added frame_filter
                 fn=vggt_demo,  # Use our wrapper function
+                cache_examples=True,
                 examples_per_page=50,
                 )
     submit_btn.click(
+        fn=clear_fields,
+        inputs=[],
+        outputs=[reconstruction_output, target_dir_output]
+    ).then(
+        fn=vggt_demo,
+        inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
+        outputs=[reconstruction_output, log_output, target_dir_output, frame_filter]
     )
+    # submit_btn.click(
+    #     vggt_demo,  # Use the same wrapper function
+    #     [input_video, input_images, conf_thres, frame_filter, mask_black_bg],
+    #     [reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter to outputs
+    #     # concurrency_limit=1
+    # )
+    # Add event handlers for automatic updates when parameters change
+    conf_thres.change(
         update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
         [reconstruction_output, log_output, target_dir_output],
     )
+    frame_filter.change(
+        update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
+        [reconstruction_output, log_output, target_dir_output],
+    )
+    mask_black_bg.change(
+        update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
+        [reconstruction_output, log_output, target_dir_output],
+    )
+    show_cam.change(
+        update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
+        [reconstruction_output, log_output, target_dir_output],
+    )
+    demo.queue(max_size=20).launch(show_error=True, share=True) #, share=True, server_port=7888, server_name="0.0.0.0")
     # share=True
     # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
 ########################################################################################################################

examples/room/images/IMG_1507.HEIC.JPG.JPG DELETED Viewed

Git LFS Details

SHA256: d7be333a2147ae3d118fefcb343bc29cac112272038c4edd354040210c61e297
Pointer size: 131 Bytes
Size of remote file: 231 kB

examples/room/images/IMG_1508.HEIC.JPG.JPG DELETED Viewed

Git LFS Details

SHA256: ec8d7b0e1ce14b62abc05586577bc930737273ab191d94b97a6075cbfbe78485
Pointer size: 131 Bytes
Size of remote file: 221 kB

examples/room/images/IMG_1509.HEIC.JPG.JPG DELETED Viewed

Git LFS Details

SHA256: 39f9b9c15bd38073112228c5ab50befbdcc6d5d77f4c3ebafbad0218d13122e3
Pointer size: 131 Bytes
Size of remote file: 242 kB

examples/room/images/IMG_1510.HEIC.JPG.JPG DELETED Viewed

Git LFS Details

SHA256: ef867329251646dd1a9a65e53d520abe1ae312f4b03020def00be1d82e079119
Pointer size: 131 Bytes
Size of remote file: 255 kB

examples/room/images/IMG_1511.HEIC.JPG.JPG DELETED Viewed

Git LFS Details

SHA256: e1f24a0d124f2a884a8f98a887f6d4c99ac8ed6251f25cd1e59a8ab287328d85
Pointer size: 131 Bytes
Size of remote file: 256 kB

examples/room/images/IMG_1512.HEIC.JPG.JPG DELETED Viewed

Git LFS Details

SHA256: 127a830eb92e5a33ea4f01002ef12073e827789e47f4fb59910fa050793eba79
Pointer size: 131 Bytes
Size of remote file: 128 kB

examples/{room/images/IMG_1506.HEIC.JPG.JPG → videos/room_video.mp4} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd8dc207341579e75338323ad0bf3cd10a0f23bea8a60f1d4b49579f8c606fa0
-size 269173

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d21d3682bb84bbeaa6a5b4d766998a083f9a1146fb2ef03761b7c7d98d83d42
+size 1179725

gradio_util.py CHANGED Viewed

@@ -16,7 +16,7 @@ from scipy.spatial.transform import Rotation
-def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False) -> trimesh.Scene:
     """
     Converts VGG SFM predictions to a 3D scene represented as a GLB.
@@ -67,18 +67,18 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
     colors_rgb = colors_rgb[conf_mask]
-    # vertices_3d = predictions["points3D"].cpu().numpy()
-    # colors_rgb = (predictions["points3D_rgb"].cpu().numpy() * 255).astype(
-    #     np.uint8
-    # )
-    # camera_matrices = predictions["extrinsics_opencv"].cpu().numpy()
-    # Calculate the 5th and 95th percentiles along each axis
-    lower_percentile = np.percentile(vertices_3d, 5, axis=0)
-    upper_percentile = np.percentile(vertices_3d, 95, axis=0)
-    # Calculate the diagonal length of the percentile bounding box
-    scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
     colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
@@ -98,16 +98,17 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
     extrinsics_matrices[:, :3, :4] = camera_matrices
     extrinsics_matrices[:, 3, 3] = 1
-    # Add camera models to the scene
-    for i in range(num_cameras):
-        world_to_camera = extrinsics_matrices[i]
-        camera_to_world = np.linalg.inv(world_to_camera)
-        rgba_color = colormap(i / num_cameras)
-        current_color = tuple(int(255 * x) for x in rgba_color[:3])
-        integrate_camera_into_scene(
-            scene_3d, camera_to_world, current_color, scene_scale
-        )
     # Align scene to the observation of the first camera
     scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)

+def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False, show_cam=True) -> trimesh.Scene:
     """
     Converts VGG SFM predictions to a 3D scene represented as a GLB.
     colors_rgb = colors_rgb[conf_mask]
+    if vertices_3d is None or np.asarray(vertices_3d).size == 0:
+        vertices_3d = np.array([[1, 0, 0]])
+        colors_rgb = np.array([[255, 255, 255]])
+        scene_scale = 1
+    else:
+        # Calculate the 5th and 95th percentiles along each axis
+        lower_percentile = np.percentile(vertices_3d, 5, axis=0)
+        upper_percentile = np.percentile(vertices_3d, 95, axis=0)
+        # Calculate the diagonal length of the percentile bounding box
+        scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
     colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
     extrinsics_matrices[:, :3, :4] = camera_matrices
     extrinsics_matrices[:, 3, 3] = 1
+    if show_cam:
+        # Add camera models to the scene
+        for i in range(num_cameras):
+            world_to_camera = extrinsics_matrices[i]
+            camera_to_world = np.linalg.inv(world_to_camera)
+            rgba_color = colormap(i / num_cameras)
+            current_color = tuple(int(255 * x) for x in rgba_color[:3])
+            integrate_camera_into_scene(
+                scene_3d, camera_to_world, current_color, scene_scale
+            )
     # Align scene to the observation of the first camera
     scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)