Spaces:

facebook
/

vggt

Running on Zero

App Files Files Community

JianyuanWang commited on 3 days ago

Commit

ae06e9d

1 Parent(s): 7998a5d

upload dexamples

Browse files

Files changed (35) hide show

.gitattributes +2 -0
app.py +35 -18
clean_app.py +0 -229
examples/british_museum/images/336.jpg +0 -3
examples/british_museum/images/515.jpg +0 -3
examples/british_museum/images/599.jpg +0 -3
examples/british_museum/images/632.jpg +0 -3
examples/british_museum/images/767.jpg +0 -3
examples/british_museum/images/886.jpg +0 -3
examples/cake_single/images/frame000020.jpg +0 -3
examples/llff_horns_single/images/017.png +0 -3
examples/room/images/{4.png → no_overlap_1.png} +0 -0
examples/room/images/{IMG_1506.HEIC.JPG → no_overlap_2.HEIC.JPG} +0 -0
examples/room/images/{IMG_1507.HEIC.JPG → no_overlap_3.HEIC.JPG} +0 -0
examples/room/images/{IMG_1508.HEIC.JPG → no_overlap_4.HEIC.JPG} +0 -0
examples/room/images/{IMG_1509.HEIC.JPG → no_overlap_5.HEIC.JPG} +0 -0
examples/room/images/{IMG_1510.HEIC.JPG → no_overlap_6.HEIC.JPG} +0 -0
examples/room/images/{IMG_1511.HEIC.JPG → no_overlap_7.HEIC.JPG} +0 -0
examples/room/images/{IMG_1512.HEIC.JPG → no_overlap_8.HEIC.JPG} +0 -0
examples/{british_museum/images/210.jpg → single_cartoon/images/model_was_never_trained_on_single_image_or_cartoon.jpg} +2 -2
examples/{british_museum/images/069.jpg → single_oil_painting/images/model_was_never_trained_on_single_image_or_oil_painting.png} +2 -2
examples/statue/images/000.jpg +0 -3
examples/statue/images/002.jpg +0 -3
examples/statue/images/004.jpg +0 -3
examples/statue/images/006.jpg +0 -3
examples/statue/images/008.jpg +0 -3
examples/statue/images/010.jpg +0 -3
examples/statue/images/012.jpg +0 -3
examples/statue/images/014.jpg +0 -3
examples/statue/images/016.jpg +0 -3
examples/{british_museum/images/134.jpg → videos/single_cartoon.mp4} +2 -2
examples/{british_museum/images/192.jpg → videos/single_oil_painting.mp4} +2 -2
gradio_util.py +102 -9
requirements.txt +1 -2
skyseg.onnx +3 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 examples/** filter=lfs diff=lfs merge=lfs -text
 examples/videos/room_video.mp4 filter=lfs diff=lfs merge=lfs -text
 examples/room/images/IMG_1506.HEIC.JPG filter=lfs diff=lfs merge=lfs -text

 examples/** filter=lfs diff=lfs merge=lfs -text
 examples/videos/room_video.mp4 filter=lfs diff=lfs merge=lfs -text
 examples/room/images/IMG_1506.HEIC.JPG filter=lfs diff=lfs merge=lfs -text
+skyseg.onnx filter=lfs diff=lfs merge=lfs -text
+examples/ filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -48,7 +48,8 @@ def vggt_demo(
     conf_thres=3.0,
     frame_filter="all",
     mask_black_bg=False,
-    show_cam=True
 ):
     start_time = time.time()
     gc.collect()
@@ -122,10 +123,10 @@ def vggt_demo(
     np.savez(prediction_save_path, **predictions)
-    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
-    glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
     glbscene.export(file_obj=glbfile)
     del predictions
@@ -162,7 +163,7 @@ def update_log():
-def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg, show_cam):
     # Return early if target_dir is None, "None", empty string, or otherwise invalid
     # Check if the predictions file exists
     predictions_path = f"{target_dir}/predictions.npz"
@@ -179,10 +180,10 @@ def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg, sh
     # for key in predictions.files: print(key)
     predictions = {key: loaded[key] for key in loaded.keys()}
-    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
     if not os.path.exists(glbfile):
-        glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
         glbscene.export(file_obj=glbfile)
     return glbfile, "Updating Visualization", target_dir
@@ -214,6 +215,11 @@ drums_video = "examples/videos/drums_video.mp4"
 kitchen_video = "examples/videos/kitchen_video.mp4"
 room_video = "examples/videos/room_video.mp4"
 ###########################################################################################
 apple_images = glob.glob(f'examples/apple/images/*')
 bonsai_images = glob.glob(f'examples/bonsai/images/*')
@@ -233,7 +239,8 @@ statue_images = glob.glob(f'examples/statue/images/*')
 drums_images = glob.glob(f'examples/drums/images/*')
 kitchen_images = glob.glob(f'examples/kitchen/images/*')
 room_images = glob.glob(f'examples/room/images/*')
 ###########################################################################################
@@ -281,6 +288,7 @@ with gr.Blocks(css="""
                 with gr.Column():
                     show_cam = gr.Checkbox(label="Show Camera", value=True)
                     mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
             # Add a hidden textbox for target_dir with default value "None"
             target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
@@ -296,15 +304,18 @@ with gr.Blocks(css="""
     examples = [
-        [room_video, room_images, 1.1, "All", False, True],
-        [counter_video, counter_images, 1.5, "All", False, True],
-        [flower_video, flower_images, 1.5, "All", False, True],
-        [kitchen_video, kitchen_images, 3, "All", False, True],
-        [fern_video, fern_images, 1.5, "All", False, True],
     ]
     gr.Examples(examples=examples,
-                inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
                 outputs=[reconstruction_output, log_output, target_dir_output, frame_filter],  # Added frame_filter
                 fn=vggt_demo,  # Use our wrapper function
                 cache_examples=False,
@@ -322,7 +333,7 @@ with gr.Blocks(css="""
         outputs=[log_output]
     ).then(
         fn=vggt_demo,
-        inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
         outputs=[reconstruction_output, log_output, target_dir_output, frame_filter]
     )
@@ -330,25 +341,31 @@ with gr.Blocks(css="""
     # Add event handlers for automatic updates when parameters change
     conf_thres.change(
         update_visualization,
-        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
         [reconstruction_output, log_output, target_dir_output],
     )
     frame_filter.change(
         update_visualization,
-        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
         [reconstruction_output, log_output, target_dir_output],
     )
     mask_black_bg.change(
         update_visualization,
-        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
         [reconstruction_output, log_output, target_dir_output],
     )
     show_cam.change(
         update_visualization,
-        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
         [reconstruction_output, log_output, target_dir_output],
     )

     conf_thres=3.0,
     frame_filter="all",
     mask_black_bg=False,
+    show_cam=True,
+    mask_sky=False
 ):
     start_time = time.time()
     gc.collect()
     np.savez(prediction_save_path, **predictions)
+    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}_sky{mask_sky}.glb"
+    glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam, mask_sky=mask_sky, target_dir=target_dir)
     glbscene.export(file_obj=glbfile)
     del predictions
+def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky):
     # Return early if target_dir is None, "None", empty string, or otherwise invalid
     # Check if the predictions file exists
     predictions_path = f"{target_dir}/predictions.npz"
     # for key in predictions.files: print(key)
     predictions = {key: loaded[key] for key in loaded.keys()}
+    glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}_sky{mask_sky}.glb"
     if not os.path.exists(glbfile):
+        glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam, mask_sky=mask_sky, target_dir=target_dir)
         glbscene.export(file_obj=glbfile)
     return glbfile, "Updating Visualization", target_dir
 kitchen_video = "examples/videos/kitchen_video.mp4"
 room_video = "examples/videos/room_video.mp4"
+# Add the new video examples
+single_cartoon_video = "examples/videos/single_cartoon.mp4"
+single_oil_painting_video = "examples/videos/single_oil_painting.mp4"
 ###########################################################################################
 apple_images = glob.glob(f'examples/apple/images/*')
 bonsai_images = glob.glob(f'examples/bonsai/images/*')
 drums_images = glob.glob(f'examples/drums/images/*')
 kitchen_images = glob.glob(f'examples/kitchen/images/*')
 room_images = glob.glob(f'examples/room/images/*')
+single_cartoon_images = glob.glob(f'examples/single_cartoon/images/*')
+single_oil_painting_images = glob.glob(f'examples/single_oil_painting/images/*')
 ###########################################################################################
                 with gr.Column():
                     show_cam = gr.Checkbox(label="Show Camera", value=True)
                     mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
+                    mask_sky = gr.Checkbox(label="Filter Sky", value=False)
             # Add a hidden textbox for target_dir with default value "None"
             target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
     examples = [
+        [single_cartoon_video, single_cartoon_images, 0.5, "All", False, True, False],
+        [single_oil_painting_video, single_oil_painting_images, 0.5, "All", False, True, True],
+        [room_video, room_images, 1.1, "All", False, True, False],
+        [counter_video, counter_images, 1.5, "All", False, True, False],
+        [flower_video, flower_images, 1.5, "All", False, True, False],
+        [kitchen_video, kitchen_images, 3, "All", False, True, False],
+        [fern_video, fern_images, 1.5, "All", False, True, False],
+        # Add the new examples
     ]
     gr.Examples(examples=examples,
+                inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
                 outputs=[reconstruction_output, log_output, target_dir_output, frame_filter],  # Added frame_filter
                 fn=vggt_demo,  # Use our wrapper function
                 cache_examples=False,
         outputs=[log_output]
     ).then(
         fn=vggt_demo,
+        inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
         outputs=[reconstruction_output, log_output, target_dir_output, frame_filter]
     )
     # Add event handlers for automatic updates when parameters change
     conf_thres.change(
         update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
         [reconstruction_output, log_output, target_dir_output],
     )
     frame_filter.change(
         update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
         [reconstruction_output, log_output, target_dir_output],
     )
     mask_black_bg.change(
         update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
         [reconstruction_output, log_output, target_dir_output],
     )
     show_cam.change(
         update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
+        [reconstruction_output, log_output, target_dir_output],
+    )
+    mask_sky.change(
+        update_visualization,
+        [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam, mask_sky],
         [reconstruction_output, log_output, target_dir_output],
     )

clean_app.py DELETED Viewed

@@ -1,229 +0,0 @@
-import os
-import cv2
-import torch
-import numpy as np
-import gradio as gr
-import sys
-import os
-import socket
-import webbrowser
-sys.path.append('vggt/')
-import shutil
-from datetime import datetime
-from demo_hf import demo_fn
-from omegaconf import DictConfig, OmegaConf
-import glob
-import gc
-import time
-from viser_fn import viser_wrapper
-def get_free_port():
-    """Get a free port using socket."""
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(('', 0))
-        port = s.getsockname()[1]
-        return port
-def vggt_demo(
-    input_video,
-    input_image,
-):
-    start_time = time.time()
-    gc.collect()
-    torch.cuda.empty_cache()
-    debug = False
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    target_dir = f"input_images_{timestamp}"
-    if os.path.exists(target_dir):
-        shutil.rmtree(target_dir)
-    os.makedirs(target_dir)
-    target_dir_images = target_dir + "/images"
-    os.makedirs(target_dir_images)
-    if input_video is not None:
-        if not isinstance(input_video, str):
-            input_video = input_video["video"]["path"]
-    cfg_file = "config/base.yaml"
-    cfg = OmegaConf.load(cfg_file)
-    if input_image is not None:
-        input_image = sorted(input_image)
-        # recon_num = len(input_image)
-        # Copy files to the new directory
-        for file_name in input_image:
-            shutil.copy(file_name, target_dir_images)
-    elif input_video is not None:
-        vs = cv2.VideoCapture(input_video)
-        fps = vs.get(cv2.CAP_PROP_FPS)
-        frame_rate = 1
-        frame_interval = int(fps * frame_rate)
-        video_frame_num = 0
-        count = 0
-        while True:
-            (gotit, frame) = vs.read()
-            count +=1
-            if not gotit:
-                break
-            if count % frame_interval == 0:
-                cv2.imwrite(target_dir_images+"/"+f"{video_frame_num:06}.png", frame)
-                video_frame_num+=1
-    else:
-        return None, "Uploading not finished or Incorrect input format"
-    print(f"Files have been copied to {target_dir_images}")
-    cfg.SCENE_DIR = target_dir
-    predictions = demo_fn(cfg)
-    # Get a free port for viser
-    viser_port = get_free_port()
-    # Start viser visualization in a separate thread/process
-    viser_wrapper(predictions, port=viser_port)
-    del predictions
-    gc.collect()
-    torch.cuda.empty_cache()
-    print(input_image)
-    print(input_video)
-    end_time = time.time()
-    execution_time = end_time - start_time
-    print(f"Execution time: {execution_time} seconds")
-    return None, viser_port
-statue_video = "examples/videos/statue_video.mp4"
-apple_video = "examples/videos/apple_video.mp4"
-british_museum_video = "examples/videos/british_museum_video.mp4"
-cake_video = "examples/videos/cake_video.mp4"
-bonsai_video = "examples/videos/bonsai_video.mp4"
-face_video =  "examples/videos/in2n_face_video.mp4"
-counter_video =  "examples/videos/in2n_counter_video.mp4"
-horns_video = "examples/videos/llff_horns_video.mp4"
-person_video = "examples/videos/in2n_person_video.mp4"
-flower_video = "examples/videos/llff_flower_video.mp4"
-fern_video = "examples/videos/llff_fern_video.mp4"
-drums_video = "examples/videos/drums_video.mp4"
-kitchen_video = "examples/videos/kitchen_video.mp4"
-###########################################################################################
-apple_images = glob.glob(f'examples/apple/images/*')
-bonsai_images = glob.glob(f'examples/bonsai/images/*')
-cake_images = glob.glob(f'examples/cake/images/*')
-british_museum_images = glob.glob(f'examples/british_museum/images/*')
-face_images = glob.glob(f'examples/in2n_face/images/*')
-counter_images = glob.glob(f'examples/in2n_counter/images/*')
-horns_images = glob.glob(f'examples/llff_horns/images/*')
-person_images = glob.glob(f'examples/in2n_person/images/*')
-flower_images = glob.glob(f'examples/llff_flower/images/*')
-fern_images = glob.glob(f'examples/llff_fern/images/*')
-statue_images = glob.glob(f'examples/statue/images/*')
-drums_images = glob.glob(f'examples/drums/images/*')
-kitchen_images = glob.glob(f'examples/kitchen/images/*')
-###########################################################################################
-with gr.Blocks() as demo:
-    gr.Markdown("""
-    # 🏛️ VGGT: Visual Geometry Grounded Transformer
-    <div style="font-size: 16px; line-height: 1.2;">
-    Alpha version (testing).
-    </div>
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_video = gr.Video(label="Upload Video", interactive=True)
-            input_images = gr.File(file_count="multiple", label="Upload Images", interactive=True)
-        with gr.Column(scale=3):
-            viser_output = gr.HTML(
-                label="Viser Visualization",
-                value='''<div style="height: 520px; border: 1px solid #e0e0e0;
-                                    border-radius: 4px; padding: 16px;
-                                    display: flex; align-items: center;
-                                    justify-content: center">
-                            3D Reconstruction (Point Cloud and Camera Poses; Zoom in to see details)
-                        </div>'''
-            )
-            log_output = gr.Textbox(label="Log")
-    with gr.Row():
-        submit_btn = gr.Button("Reconstruct", scale=1)
-        clear_btn = gr.ClearButton([input_video, input_images, viser_output, log_output], scale=1) #Modified viser_output
-    examples = [
-        [flower_video, flower_images],
-        [kitchen_video, kitchen_images],
-        [counter_video, counter_images],
-        [fern_video, fern_images],
-        [horns_video, horns_images],
-    ]
-    def process_example(video, images):
-        """Wrapper function to ensure outputs are properly captured"""
-        model_output, log = vggt_demo(video, images)
-        viser_url = f"http://localhost:{log}"
-        print(f"Viser URL: {viser_url}")
-        # Create the iframe HTML code.  Set width and height appropriately.
-        iframe_code = f'<iframe src="{viser_url}" width="100%" height="520px"></iframe>'
-        return iframe_code, f"Visualization running at {viser_url}"
-    gr.Examples(examples=examples,
-                inputs=[input_video, input_images],
-                outputs=[viser_output, log_output],  # Output to viser_output
-                fn=process_example,  # Use our wrapper function
-                cache_examples=False,
-                examples_per_page=50,
-                )
-    submit_btn.click(
-        process_example,  # Use the same wrapper function
-        [input_video, input_images],
-        [viser_output, log_output], # Output to viser_output
-        concurrency_limit=1
-    )
-    demo.queue(max_size=20).launch(show_error=True, share=True, server_port=7888, server_name="0.0.0.0")

examples/british_museum/images/336.jpg DELETED Viewed

Git LFS Details

SHA256: 726f9f540c702fac8141b13f7d4f395cc80e9287a1ad750db1007fefca5d096b
Pointer size: 131 Bytes
Size of remote file: 547 kB

examples/british_museum/images/515.jpg DELETED Viewed

Git LFS Details

SHA256: 26cb34f234357543a067d50fed56d908a82ea23fffb56ae1aa44356a2f70e6e8
Pointer size: 131 Bytes
Size of remote file: 296 kB

examples/british_museum/images/599.jpg DELETED Viewed

Git LFS Details

SHA256: d3fed69d63ba0a37737b4c51cb2096cca4c1424e5942708560d4975f69c8c3e8
Pointer size: 131 Bytes
Size of remote file: 423 kB

examples/british_museum/images/632.jpg DELETED Viewed

Git LFS Details

SHA256: c8954001ee0182b4b3e7c9da0a47db1943177d685cd1b10020cdfb0763066dbc
Pointer size: 131 Bytes
Size of remote file: 561 kB

examples/british_museum/images/767.jpg DELETED Viewed

Git LFS Details

SHA256: 5ef89be7be64f4477b1d51d74cdfaef8e4d35dbd3c4b966f72bc5d3c4f41a43f
Pointer size: 131 Bytes
Size of remote file: 355 kB

examples/british_museum/images/886.jpg DELETED Viewed

Git LFS Details

SHA256: 42413737d4f1511630140696f0ba0a72b476ca61537f854f7707c7996d1a26eb
Pointer size: 131 Bytes
Size of remote file: 339 kB

examples/cake_single/images/frame000020.jpg DELETED Viewed

Git LFS Details

SHA256: 78e72404f2b0fd05ec71345f9ed743fe330dde11ca987f64e271486d943f17da
Pointer size: 130 Bytes
Size of remote file: 30.9 kB

examples/llff_horns_single/images/017.png DELETED Viewed

Git LFS Details

SHA256: 9f43da76f936be3aa58ed06e68868b38ea7576b78e2a0e228585565410940ca3
Pointer size: 132 Bytes
Size of remote file: 1.34 MB

examples/room/images/{4.png → no_overlap_1.png} RENAMED Viewed

File without changes

examples/room/images/{IMG_1506.HEIC.JPG → no_overlap_2.HEIC.JPG} RENAMED Viewed

File without changes

examples/room/images/{IMG_1507.HEIC.JPG → no_overlap_3.HEIC.JPG} RENAMED Viewed

File without changes

examples/room/images/{IMG_1508.HEIC.JPG → no_overlap_4.HEIC.JPG} RENAMED Viewed

File without changes

examples/room/images/{IMG_1509.HEIC.JPG → no_overlap_5.HEIC.JPG} RENAMED Viewed

File without changes

examples/room/images/{IMG_1510.HEIC.JPG → no_overlap_6.HEIC.JPG} RENAMED Viewed

File without changes

examples/room/images/{IMG_1511.HEIC.JPG → no_overlap_7.HEIC.JPG} RENAMED Viewed

File without changes

examples/room/images/{IMG_1512.HEIC.JPG → no_overlap_8.HEIC.JPG} RENAMED Viewed

File without changes

examples/{british_museum/images/210.jpg → single_cartoon/images/model_was_never_trained_on_single_image_or_cartoon.jpg} RENAMED Viewed

File without changes

examples/{british_museum/images/069.jpg → single_oil_painting/images/model_was_never_trained_on_single_image_or_oil_painting.png} RENAMED Viewed

File without changes

examples/statue/images/000.jpg DELETED Viewed

Git LFS Details

SHA256: aca81ede79384e90bbe057a951b71fbb9bc709a3191bb2bb54a05019029360af
Pointer size: 131 Bytes
Size of remote file: 661 kB

examples/statue/images/002.jpg DELETED Viewed

Git LFS Details

SHA256: 18fa58af38b7c31420e818700a58428e599f2950e3e4b119071ebb975ab04a13
Pointer size: 131 Bytes
Size of remote file: 615 kB

examples/statue/images/004.jpg DELETED Viewed

Git LFS Details

SHA256: 5108b2de57cf17898d0ed31c5e42d5a084232de15cc6124d01143534f27fa9e3
Pointer size: 131 Bytes
Size of remote file: 589 kB

examples/statue/images/006.jpg DELETED Viewed

Git LFS Details

SHA256: 976c7d34efbd942d23e3ccea1ffdd213c09388a6986e6f65854c515c40aeeae7
Pointer size: 131 Bytes
Size of remote file: 646 kB

examples/statue/images/008.jpg DELETED Viewed

Git LFS Details

SHA256: 891202fc06db30afa824988a6974f40d4495bb87981f9bee861a09ea42c6b3f7
Pointer size: 131 Bytes
Size of remote file: 647 kB

examples/statue/images/010.jpg DELETED Viewed

Git LFS Details

SHA256: 89266e9752da89b6969c5ef8ddfd64a2adc389d4ae0db90bafb2478efe5d87bc
Pointer size: 131 Bytes
Size of remote file: 609 kB

examples/statue/images/012.jpg DELETED Viewed

Git LFS Details

SHA256: 3a1a5512af7bb65cd45b925be615d64c7d52b4bfa1e5bd48e9ee199729a148ca
Pointer size: 131 Bytes
Size of remote file: 627 kB

examples/statue/images/014.jpg DELETED Viewed

Git LFS Details

SHA256: f4840c70fea2bf2663d766a2ddfb57ec12bf8dc0a5cf2797573be9dffc710a0a
Pointer size: 131 Bytes
Size of remote file: 661 kB

examples/statue/images/016.jpg DELETED Viewed

Git LFS Details

SHA256: af8e4265f7d1a1d3adbdd9831b921e7363fc3447d267dd169d7d7b6e000f7160
Pointer size: 131 Bytes
Size of remote file: 682 kB

examples/{british_museum/images/134.jpg → videos/single_cartoon.mp4} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d460a3f14ed3a7bcfc270c0caf47bc0e0f051233cc29fcde479f8ef75c369be
-size 495698

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5418685a991dc3dba680cc1e2708214f479fc658ddb712344edc4274d373548
+size 228582

examples/{british_museum/images/192.jpg → videos/single_oil_painting.mp4} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6fd399b3494430562295cbf3ed3140822ee74ef15e5e6ec532213f86df455dc
-size 451018

 version https://git-lfs.github.com/spec/v1
+oid sha256:97c3ab94b7970cbe7960eb2bedf8a38343a909db035d4776aff5ba51cb3daa65
+size 794368

gradio_util.py CHANGED Viewed

@@ -7,16 +7,12 @@ import gradio as gr
 import numpy as np
 import matplotlib
 from scipy.spatial.transform import Rotation
-# except:
-#     print(
-#         f"Failed to import packages for Gradio visualization. Please disable gradio visualization"
-#     )
-def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False, show_cam=True) -> trimesh.Scene:
     """
     Converts VGG SFM predictions to a 3D scene represented as a GLB.
@@ -29,6 +25,10 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
     # Convert predictions to numpy arrays
     # pred_extrinsic_list', 'pred_world_points', 'pred_world_points_conf', 'images', 'last_pred_extrinsic
     print("Building GLB scene")
     selected_frame_idx = None
     if filter_by_frames != "all":
@@ -36,7 +36,6 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
             # Extract the index part before the colon
             selected_frame_idx = int(filter_by_frames.split(":")[0])
         except (ValueError, IndexError):
-            # If parsing fails, default to using all frames
             pass
     pred_world_points = predictions["pred_world_points"][0] # remove batch dimension
@@ -45,6 +44,48 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
     last_pred_extrinsic = predictions["last_pred_extrinsic"][0]
     if selected_frame_idx is not None:
         pred_world_points = pred_world_points[selected_frame_idx][None]
         pred_world_points_conf = pred_world_points_conf[selected_frame_idx][None]
@@ -294,3 +335,55 @@ def compute_camera_faces(cone_shape: trimesh.Trimesh) -> np.ndarray:
     faces_list += [(v3, v2, v1) for v1, v2, v3 in faces_list]
     return np.array(faces_list)

 import numpy as np
 import matplotlib
 from scipy.spatial.transform import Rotation
+import copy
+import cv2
+import os
+def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False, show_cam=True, mask_sky=False, target_dir=None) -> trimesh.Scene:
     """
     Converts VGG SFM predictions to a 3D scene represented as a GLB.
     # Convert predictions to numpy arrays
     # pred_extrinsic_list', 'pred_world_points', 'pred_world_points_conf', 'images', 'last_pred_extrinsic
+    if conf_thres is None:
+        conf_thres = 0.0
     print("Building GLB scene")
     selected_frame_idx = None
     if filter_by_frames != "all":
             # Extract the index part before the colon
             selected_frame_idx = int(filter_by_frames.split(":")[0])
         except (ValueError, IndexError):
             pass
     pred_world_points = predictions["pred_world_points"][0] # remove batch dimension
     last_pred_extrinsic = predictions["last_pred_extrinsic"][0]
+    if mask_sky:
+        if target_dir is not None:
+            import onnxruntime
+            skyseg_session = None
+            target_dir_images = target_dir + "/images"
+            image_list = sorted(os.listdir(target_dir_images))
+            sky_mask_list = []
+            # Get the shape of pred_world_points_conf to match
+            S, H, W = pred_world_points_conf.shape
+            for i, image_name in enumerate(image_list):
+                image_filepath = os.path.join(target_dir_images, image_name)
+                mask_filepath = os.path.join(target_dir, "sky_masks", image_name)
+                # Check if mask already exists
+                if os.path.exists(mask_filepath):
+                    # Load existing mask
+                    sky_mask = cv2.imread(mask_filepath, cv2.IMREAD_GRAYSCALE)
+                else:
+                    # Generate new mask
+                    if skyseg_session is None:
+                        skyseg_session = onnxruntime.InferenceSession("skyseg.onnx")
+                    sky_mask = segment_sky(image_filepath, skyseg_session, mask_filepath)
+                # Resize mask to match H×W if needed
+                if sky_mask.shape[0] != H or sky_mask.shape[1] != W:
+                    sky_mask = cv2.resize(sky_mask, (W, H))
+                #  model_was_never_trained_on_single_image_or_oil_painting
+                # no overlap
+                # single view, youhua
+                # single view, catoon
+                sky_mask_list.append(sky_mask)
+            # Convert list to numpy array with shape S×H×W
+            sky_mask_array = np.array(sky_mask_list)
+            # Apply sky mask to confidence scores
+            sky_mask_binary = (sky_mask_array > 0.01).astype(np.float32)
+            pred_world_points_conf = pred_world_points_conf * sky_mask_binary
     if selected_frame_idx is not None:
         pred_world_points = pred_world_points[selected_frame_idx][None]
         pred_world_points_conf = pred_world_points_conf[selected_frame_idx][None]
     faces_list += [(v3, v2, v1) for v1, v2, v3 in faces_list]
     return np.array(faces_list)
+def segment_sky(image_path, onnx_session, mask_filename=None):
+    assert mask_filename is not None
+    image = cv2.imread(image_path)
+    result_map = run_skyseg(onnx_session,[320,320],image)
+    # resize the result_map to the original image size
+    result_map_original = cv2.resize(result_map, (image.shape[1], image.shape[0]))
+    output_mask = np.zeros_like(result_map_original)
+    output_mask[result_map_original<1] = 1
+    output_mask = output_mask.astype(np.uint8) * 255
+    os.makedirs(os.path.dirname(mask_filename), exist_ok=True)
+    cv2.imwrite(mask_filename, output_mask)
+    return output_mask
+def run_skyseg(onnx_session, input_size, image):
+    # Pre process:Resize, BGR->RGB, Transpose, PyTorch standardization, float32 cast
+    temp_image = copy.deepcopy(image)
+    resize_image = cv2.resize(temp_image, dsize=(input_size[0], input_size[1]))
+    x = cv2.cvtColor(resize_image, cv2.COLOR_BGR2RGB)
+    x = np.array(x, dtype=np.float32)
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    x = (x / 255 - mean) / std
+    x = x.transpose(2, 0, 1)
+    x = x.reshape(-1, 3, input_size[0], input_size[1]).astype('float32')
+    # Inference
+    input_name = onnx_session.get_inputs()[0].name
+    output_name = onnx_session.get_outputs()[0].name
+    onnx_result = onnx_session.run([output_name], {input_name: x})
+    # Post process
+    onnx_result = np.array(onnx_result).squeeze()
+    min_value = np.min(onnx_result)
+    max_value = np.max(onnx_result)
+    onnx_result = (onnx_result - min_value) / (max_value - min_value)
+    onnx_result *= 255
+    onnx_result = onnx_result.astype('uint8')
+    return onnx_result

requirements.txt CHANGED Viewed

@@ -9,8 +9,7 @@ numpy==1.26.3
 viser
 trimesh
 matplotlib
 # accelerate==0.24.0

 viser
 trimesh
 matplotlib
+onnxruntime
 # accelerate==0.24.0

skyseg.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab9c34c64c3d821220a2886a4a06da4642ffa14d5b30e8d5339056a089aa1d39
+size 175997079