Commit
·
8b29fee
1
Parent(s):
e038053
update
Browse files- .gitattributes +1 -0
- app.py +78 -49
- examples/room/images/IMG_1507.HEIC.JPG.JPG +0 -3
- examples/room/images/IMG_1508.HEIC.JPG.JPG +0 -3
- examples/room/images/IMG_1509.HEIC.JPG.JPG +0 -3
- examples/room/images/IMG_1510.HEIC.JPG.JPG +0 -3
- examples/room/images/IMG_1511.HEIC.JPG.JPG +0 -3
- examples/room/images/IMG_1512.HEIC.JPG.JPG +0 -3
- examples/{room/images/IMG_1506.HEIC.JPG.JPG → videos/room_video.mp4} +2 -2
- gradio_util.py +24 -23
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
examples/** filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
examples/** filter=lfs diff=lfs merge=lfs -text
|
37 |
+
examples/videos/room_video.mp4 filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -22,20 +22,7 @@ import spaces
|
|
22 |
|
23 |
|
24 |
|
25 |
-
|
26 |
-
# def get_free_port():
|
27 |
-
# """Get a free port using socket."""
|
28 |
-
# # return 80
|
29 |
-
# # return 8080
|
30 |
-
# # return 10088 # for debugging
|
31 |
-
# # return 7860
|
32 |
-
# # return 7888
|
33 |
-
# with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
34 |
-
# s.bind(('', 0))
|
35 |
-
# port = s.getsockname()[1]
|
36 |
-
# return port
|
37 |
-
|
38 |
-
|
39 |
|
40 |
cfg_file = "config/base.yaml"
|
41 |
cfg = OmegaConf.load(cfg_file)
|
@@ -50,16 +37,18 @@ if "vggt_model" in pretrain_model:
|
|
50 |
else:
|
51 |
vggt_model.load_state_dict(pretrain_model, strict=True)
|
52 |
|
|
|
53 |
|
54 |
# @torch.inference_mode()
|
55 |
|
56 |
-
@spaces.GPU(duration=
|
57 |
def vggt_demo(
|
58 |
input_video,
|
59 |
input_image,
|
60 |
conf_thres=3.0,
|
61 |
frame_filter="all",
|
62 |
mask_black_bg=False,
|
|
|
63 |
):
|
64 |
start_time = time.time()
|
65 |
gc.collect()
|
@@ -133,10 +122,10 @@ def vggt_demo(
|
|
133 |
np.savez(prediction_save_path, **predictions)
|
134 |
|
135 |
|
136 |
-
glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}.glb"
|
137 |
|
138 |
|
139 |
-
glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg)
|
140 |
glbscene.export(file_obj=glbfile)
|
141 |
|
142 |
del predictions
|
@@ -155,9 +144,19 @@ def vggt_demo(
|
|
155 |
log = "Success. Waiting for visualization."
|
156 |
return glbfile, log, target_dir, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
|
157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
-
def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg):
|
|
|
|
|
|
|
161 |
|
162 |
loaded = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
|
163 |
# predictions = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
|
@@ -165,10 +164,10 @@ def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg):
|
|
165 |
# for key in predictions.files: print(key)
|
166 |
predictions = {key: loaded[key] for key in loaded.keys()}
|
167 |
|
168 |
-
glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}.glb"
|
169 |
|
170 |
if not os.path.exists(glbfile):
|
171 |
-
glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg)
|
172 |
glbscene.export(file_obj=glbfile)
|
173 |
return glbfile, "Updating Visualization", target_dir
|
174 |
|
@@ -198,6 +197,7 @@ drums_video = "examples/videos/drums_video.mp4"
|
|
198 |
|
199 |
kitchen_video = "examples/videos/kitchen_video.mp4"
|
200 |
|
|
|
201 |
###########################################################################################
|
202 |
apple_images = glob.glob(f'examples/apple/images/*')
|
203 |
bonsai_images = glob.glob(f'examples/bonsai/images/*')
|
@@ -216,8 +216,7 @@ statue_images = glob.glob(f'examples/statue/images/*')
|
|
216 |
|
217 |
drums_images = glob.glob(f'examples/drums/images/*')
|
218 |
kitchen_images = glob.glob(f'examples/kitchen/images/*')
|
219 |
-
|
220 |
-
|
221 |
|
222 |
###########################################################################################
|
223 |
|
@@ -256,58 +255,88 @@ with gr.Blocks() as demo:
|
|
256 |
with gr.Row():
|
257 |
conf_thres = gr.Slider(minimum=0.1, maximum=10.0, value=2.0, step=0.1, label="Conf Thres")
|
258 |
frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
|
259 |
-
|
|
|
|
|
260 |
|
261 |
log_output = gr.Textbox(label="Log")
|
262 |
-
# Add a hidden textbox for target_dir
|
263 |
-
target_dir_output = gr.Textbox(label="Target Dir", visible=False)
|
|
|
|
|
264 |
|
265 |
with gr.Row():
|
266 |
submit_btn = gr.Button("Reconstruct", scale=1)
|
267 |
-
revisual_btn = gr.Button("Update Visualization", scale=1)
|
268 |
clear_btn = gr.ClearButton([input_video, input_images, reconstruction_output, log_output, target_dir_output], scale=1) #Modified reconstruction_output
|
269 |
|
270 |
|
271 |
|
272 |
|
273 |
examples = [
|
274 |
-
[
|
275 |
-
[
|
276 |
-
[
|
277 |
-
[
|
278 |
-
|
279 |
-
# [statue_video, statue_images],
|
280 |
-
# [drums_video, drums_images],
|
281 |
-
# [horns_video, horns_images, 1.5, "All", False],
|
282 |
-
# [apple_video, apple_images],
|
283 |
-
# [bonsai_video, bonsai_images],
|
284 |
]
|
285 |
|
286 |
gr.Examples(examples=examples,
|
287 |
-
inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg],
|
288 |
outputs=[reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter
|
289 |
fn=vggt_demo, # Use our wrapper function
|
290 |
-
cache_examples=
|
291 |
examples_per_page=50,
|
292 |
)
|
293 |
|
|
|
294 |
submit_btn.click(
|
295 |
-
|
296 |
-
[
|
297 |
-
[reconstruction_output,
|
298 |
-
|
|
|
|
|
|
|
299 |
)
|
300 |
|
301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
update_visualization,
|
303 |
-
[target_dir_output, conf_thres, frame_filter, mask_black_bg],
|
304 |
[reconstruction_output, log_output, target_dir_output],
|
305 |
)
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
# share=True
|
312 |
# demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
|
313 |
########################################################################################################################
|
|
|
|
22 |
|
23 |
|
24 |
|
25 |
+
print("Loading model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
cfg_file = "config/base.yaml"
|
28 |
cfg = OmegaConf.load(cfg_file)
|
|
|
37 |
else:
|
38 |
vggt_model.load_state_dict(pretrain_model, strict=True)
|
39 |
|
40 |
+
print("Model loaded")
|
41 |
|
42 |
# @torch.inference_mode()
|
43 |
|
44 |
+
@spaces.GPU(duration=120)
|
45 |
def vggt_demo(
|
46 |
input_video,
|
47 |
input_image,
|
48 |
conf_thres=3.0,
|
49 |
frame_filter="all",
|
50 |
mask_black_bg=False,
|
51 |
+
show_cam=True
|
52 |
):
|
53 |
start_time = time.time()
|
54 |
gc.collect()
|
|
|
122 |
np.savez(prediction_save_path, **predictions)
|
123 |
|
124 |
|
125 |
+
glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
|
126 |
|
127 |
|
128 |
+
glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
|
129 |
glbscene.export(file_obj=glbfile)
|
130 |
|
131 |
del predictions
|
|
|
144 |
log = "Success. Waiting for visualization."
|
145 |
return glbfile, log, target_dir, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
|
146 |
|
147 |
+
def clear_fields():
|
148 |
+
"""
|
149 |
+
Return None for reconstruction_output and target_dir_output
|
150 |
+
(and optionally reset frame_filter to "All" or something else if needed).
|
151 |
+
"""
|
152 |
+
return None, None
|
153 |
+
|
154 |
|
155 |
|
156 |
+
def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg, show_cam):
|
157 |
+
# Return early if target_dir is None or "None"
|
158 |
+
if target_dir is None or target_dir == "None":
|
159 |
+
return None, "No reconstruction available. Please run 'Reconstruct' first.", None
|
160 |
|
161 |
loaded = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
|
162 |
# predictions = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
|
|
|
164 |
# for key in predictions.files: print(key)
|
165 |
predictions = {key: loaded[key] for key in loaded.keys()}
|
166 |
|
167 |
+
glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
|
168 |
|
169 |
if not os.path.exists(glbfile):
|
170 |
+
glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
|
171 |
glbscene.export(file_obj=glbfile)
|
172 |
return glbfile, "Updating Visualization", target_dir
|
173 |
|
|
|
197 |
|
198 |
kitchen_video = "examples/videos/kitchen_video.mp4"
|
199 |
|
200 |
+
room_video = "examples/videos/room_video.mp4"
|
201 |
###########################################################################################
|
202 |
apple_images = glob.glob(f'examples/apple/images/*')
|
203 |
bonsai_images = glob.glob(f'examples/bonsai/images/*')
|
|
|
216 |
|
217 |
drums_images = glob.glob(f'examples/drums/images/*')
|
218 |
kitchen_images = glob.glob(f'examples/kitchen/images/*')
|
219 |
+
room_images = glob.glob(f'examples/room/images/*')
|
|
|
220 |
|
221 |
###########################################################################################
|
222 |
|
|
|
255 |
with gr.Row():
|
256 |
conf_thres = gr.Slider(minimum=0.1, maximum=10.0, value=2.0, step=0.1, label="Conf Thres")
|
257 |
frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
|
258 |
+
with gr.Column():
|
259 |
+
show_cam = gr.Checkbox(label="Show Camera", value=True)
|
260 |
+
mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
|
261 |
|
262 |
log_output = gr.Textbox(label="Log")
|
263 |
+
# Add a hidden textbox for target_dir with default value "None"
|
264 |
+
target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
|
265 |
+
|
266 |
+
|
267 |
|
268 |
with gr.Row():
|
269 |
submit_btn = gr.Button("Reconstruct", scale=1)
|
270 |
+
# revisual_btn = gr.Button("Update Visualization", scale=1)
|
271 |
clear_btn = gr.ClearButton([input_video, input_images, reconstruction_output, log_output, target_dir_output], scale=1) #Modified reconstruction_output
|
272 |
|
273 |
|
274 |
|
275 |
|
276 |
examples = [
|
277 |
+
[room_video, room_images, 1.0, "All", False, True],
|
278 |
+
[counter_video, counter_images, 1.5, "All", False, True],
|
279 |
+
[flower_video, flower_images, 1.5, "All", False, True],
|
280 |
+
[kitchen_video, kitchen_images, 3, "All", False, True],
|
281 |
+
[fern_video, fern_images, 1.5, "All", False, True],
|
|
|
|
|
|
|
|
|
|
|
282 |
]
|
283 |
|
284 |
gr.Examples(examples=examples,
|
285 |
+
inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
|
286 |
outputs=[reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter
|
287 |
fn=vggt_demo, # Use our wrapper function
|
288 |
+
cache_examples=True,
|
289 |
examples_per_page=50,
|
290 |
)
|
291 |
|
292 |
+
|
293 |
submit_btn.click(
|
294 |
+
fn=clear_fields,
|
295 |
+
inputs=[],
|
296 |
+
outputs=[reconstruction_output, target_dir_output]
|
297 |
+
).then(
|
298 |
+
fn=vggt_demo,
|
299 |
+
inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
|
300 |
+
outputs=[reconstruction_output, log_output, target_dir_output, frame_filter]
|
301 |
)
|
302 |
|
303 |
+
|
304 |
+
# submit_btn.click(
|
305 |
+
# vggt_demo, # Use the same wrapper function
|
306 |
+
# [input_video, input_images, conf_thres, frame_filter, mask_black_bg],
|
307 |
+
# [reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter to outputs
|
308 |
+
# # concurrency_limit=1
|
309 |
+
# )
|
310 |
+
|
311 |
+
# Add event handlers for automatic updates when parameters change
|
312 |
+
conf_thres.change(
|
313 |
update_visualization,
|
314 |
+
[target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
|
315 |
[reconstruction_output, log_output, target_dir_output],
|
316 |
)
|
317 |
+
|
318 |
+
frame_filter.change(
|
319 |
+
update_visualization,
|
320 |
+
[target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
|
321 |
+
[reconstruction_output, log_output, target_dir_output],
|
322 |
+
)
|
323 |
+
|
324 |
+
mask_black_bg.change(
|
325 |
+
update_visualization,
|
326 |
+
[target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
|
327 |
+
[reconstruction_output, log_output, target_dir_output],
|
328 |
+
)
|
329 |
+
|
330 |
+
show_cam.change(
|
331 |
+
update_visualization,
|
332 |
+
[target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
|
333 |
+
[reconstruction_output, log_output, target_dir_output],
|
334 |
+
)
|
335 |
+
|
336 |
+
demo.queue(max_size=20).launch(show_error=True, share=True) #, share=True, server_port=7888, server_name="0.0.0.0")
|
337 |
+
|
338 |
+
|
339 |
# share=True
|
340 |
# demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
|
341 |
########################################################################################################################
|
342 |
+
|
examples/room/images/IMG_1507.HEIC.JPG.JPG
DELETED
Git LFS Details
|
examples/room/images/IMG_1508.HEIC.JPG.JPG
DELETED
Git LFS Details
|
examples/room/images/IMG_1509.HEIC.JPG.JPG
DELETED
Git LFS Details
|
examples/room/images/IMG_1510.HEIC.JPG.JPG
DELETED
Git LFS Details
|
examples/room/images/IMG_1511.HEIC.JPG.JPG
DELETED
Git LFS Details
|
examples/room/images/IMG_1512.HEIC.JPG.JPG
DELETED
Git LFS Details
|
examples/{room/images/IMG_1506.HEIC.JPG.JPG → videos/room_video.mp4}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d21d3682bb84bbeaa6a5b4d766998a083f9a1146fb2ef03761b7c7d98d83d42
|
3 |
+
size 1179725
|
gradio_util.py
CHANGED
@@ -16,7 +16,7 @@ from scipy.spatial.transform import Rotation
|
|
16 |
|
17 |
|
18 |
|
19 |
-
def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False) -> trimesh.Scene:
|
20 |
"""
|
21 |
Converts VGG SFM predictions to a 3D scene represented as a GLB.
|
22 |
|
@@ -67,18 +67,18 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
|
|
67 |
colors_rgb = colors_rgb[conf_mask]
|
68 |
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
|
83 |
colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
|
84 |
|
@@ -98,16 +98,17 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
|
|
98 |
extrinsics_matrices[:, :3, :4] = camera_matrices
|
99 |
extrinsics_matrices[:, 3, 3] = 1
|
100 |
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
111 |
|
112 |
# Align scene to the observation of the first camera
|
113 |
scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)
|
|
|
16 |
|
17 |
|
18 |
|
19 |
+
def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False, show_cam=True) -> trimesh.Scene:
|
20 |
"""
|
21 |
Converts VGG SFM predictions to a 3D scene represented as a GLB.
|
22 |
|
|
|
67 |
colors_rgb = colors_rgb[conf_mask]
|
68 |
|
69 |
|
70 |
+
|
71 |
+
if vertices_3d is None or np.asarray(vertices_3d).size == 0:
|
72 |
+
vertices_3d = np.array([[1, 0, 0]])
|
73 |
+
colors_rgb = np.array([[255, 255, 255]])
|
74 |
+
scene_scale = 1
|
75 |
+
else:
|
76 |
+
# Calculate the 5th and 95th percentiles along each axis
|
77 |
+
lower_percentile = np.percentile(vertices_3d, 5, axis=0)
|
78 |
+
upper_percentile = np.percentile(vertices_3d, 95, axis=0)
|
79 |
+
|
80 |
+
# Calculate the diagonal length of the percentile bounding box
|
81 |
+
scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
|
82 |
|
83 |
colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
|
84 |
|
|
|
98 |
extrinsics_matrices[:, :3, :4] = camera_matrices
|
99 |
extrinsics_matrices[:, 3, 3] = 1
|
100 |
|
101 |
+
if show_cam:
|
102 |
+
# Add camera models to the scene
|
103 |
+
for i in range(num_cameras):
|
104 |
+
world_to_camera = extrinsics_matrices[i]
|
105 |
+
camera_to_world = np.linalg.inv(world_to_camera)
|
106 |
+
rgba_color = colormap(i / num_cameras)
|
107 |
+
current_color = tuple(int(255 * x) for x in rgba_color[:3])
|
108 |
+
|
109 |
+
integrate_camera_into_scene(
|
110 |
+
scene_3d, camera_to_world, current_color, scene_scale
|
111 |
+
)
|
112 |
|
113 |
# Align scene to the observation of the first camera
|
114 |
scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)
|