JianyuanWang commited on
Commit
8b29fee
·
1 Parent(s): e038053
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  examples/** filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  examples/** filter=lfs diff=lfs merge=lfs -text
37
+ examples/videos/room_video.mp4 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -22,20 +22,7 @@ import spaces
22
 
23
 
24
 
25
-
26
- # def get_free_port():
27
- # """Get a free port using socket."""
28
- # # return 80
29
- # # return 8080
30
- # # return 10088 # for debugging
31
- # # return 7860
32
- # # return 7888
33
- # with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
34
- # s.bind(('', 0))
35
- # port = s.getsockname()[1]
36
- # return port
37
-
38
-
39
 
40
  cfg_file = "config/base.yaml"
41
  cfg = OmegaConf.load(cfg_file)
@@ -50,16 +37,18 @@ if "vggt_model" in pretrain_model:
50
  else:
51
  vggt_model.load_state_dict(pretrain_model, strict=True)
52
 
 
53
 
54
  # @torch.inference_mode()
55
 
56
- @spaces.GPU(duration=240)
57
  def vggt_demo(
58
  input_video,
59
  input_image,
60
  conf_thres=3.0,
61
  frame_filter="all",
62
  mask_black_bg=False,
 
63
  ):
64
  start_time = time.time()
65
  gc.collect()
@@ -133,10 +122,10 @@ def vggt_demo(
133
  np.savez(prediction_save_path, **predictions)
134
 
135
 
136
- glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}.glb"
137
 
138
 
139
- glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg)
140
  glbscene.export(file_obj=glbfile)
141
 
142
  del predictions
@@ -155,9 +144,19 @@ def vggt_demo(
155
  log = "Success. Waiting for visualization."
156
  return glbfile, log, target_dir, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
157
 
 
 
 
 
 
 
 
158
 
159
 
160
- def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg):
 
 
 
161
 
162
  loaded = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
163
  # predictions = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
@@ -165,10 +164,10 @@ def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg):
165
  # for key in predictions.files: print(key)
166
  predictions = {key: loaded[key] for key in loaded.keys()}
167
 
168
- glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}.glb"
169
 
170
  if not os.path.exists(glbfile):
171
- glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg)
172
  glbscene.export(file_obj=glbfile)
173
  return glbfile, "Updating Visualization", target_dir
174
 
@@ -198,6 +197,7 @@ drums_video = "examples/videos/drums_video.mp4"
198
 
199
  kitchen_video = "examples/videos/kitchen_video.mp4"
200
 
 
201
  ###########################################################################################
202
  apple_images = glob.glob(f'examples/apple/images/*')
203
  bonsai_images = glob.glob(f'examples/bonsai/images/*')
@@ -216,8 +216,7 @@ statue_images = glob.glob(f'examples/statue/images/*')
216
 
217
  drums_images = glob.glob(f'examples/drums/images/*')
218
  kitchen_images = glob.glob(f'examples/kitchen/images/*')
219
-
220
-
221
 
222
  ###########################################################################################
223
 
@@ -256,58 +255,88 @@ with gr.Blocks() as demo:
256
  with gr.Row():
257
  conf_thres = gr.Slider(minimum=0.1, maximum=10.0, value=2.0, step=0.1, label="Conf Thres")
258
  frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
259
- mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
 
 
260
 
261
  log_output = gr.Textbox(label="Log")
262
- # Add a hidden textbox for target_dir
263
- target_dir_output = gr.Textbox(label="Target Dir", visible=False)
 
 
264
 
265
  with gr.Row():
266
  submit_btn = gr.Button("Reconstruct", scale=1)
267
- revisual_btn = gr.Button("Update Visualization", scale=1)
268
  clear_btn = gr.ClearButton([input_video, input_images, reconstruction_output, log_output, target_dir_output], scale=1) #Modified reconstruction_output
269
 
270
 
271
 
272
 
273
  examples = [
274
- [counter_video, counter_images, 1.5, "All", False],
275
- [flower_video, flower_images, 1.5, "All", False],
276
- [kitchen_video, kitchen_images, 3, "All", False],
277
- [fern_video, fern_images, 1.5, "All", False],
278
- # [person_video, person_images],
279
- # [statue_video, statue_images],
280
- # [drums_video, drums_images],
281
- # [horns_video, horns_images, 1.5, "All", False],
282
- # [apple_video, apple_images],
283
- # [bonsai_video, bonsai_images],
284
  ]
285
 
286
  gr.Examples(examples=examples,
287
- inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg],
288
  outputs=[reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter
289
  fn=vggt_demo, # Use our wrapper function
290
- cache_examples=False,
291
  examples_per_page=50,
292
  )
293
 
 
294
  submit_btn.click(
295
- vggt_demo, # Use the same wrapper function
296
- [input_video, input_images, conf_thres, frame_filter, mask_black_bg],
297
- [reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter to outputs
298
- # concurrency_limit=1
 
 
 
299
  )
300
 
301
- revisual_btn.click(
 
 
 
 
 
 
 
 
 
302
  update_visualization,
303
- [target_dir_output, conf_thres, frame_filter, mask_black_bg],
304
  [reconstruction_output, log_output, target_dir_output],
305
  )
306
-
307
- # demo.launch(debug=True, share=True)
308
- # demo.launch(server_name="0.0.0.0", server_port=8082, debug=True, share=False)
309
- # demo.queue(max_size=20).launch(show_error=True, share=True)
310
- demo.queue(max_size=20).launch(show_error=True) #, share=True, server_port=7888, server_name="0.0.0.0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  # share=True
312
  # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
313
  ########################################################################################################################
 
 
22
 
23
 
24
 
25
+ print("Loading model")
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  cfg_file = "config/base.yaml"
28
  cfg = OmegaConf.load(cfg_file)
 
37
  else:
38
  vggt_model.load_state_dict(pretrain_model, strict=True)
39
 
40
+ print("Model loaded")
41
 
42
  # @torch.inference_mode()
43
 
44
+ @spaces.GPU(duration=120)
45
  def vggt_demo(
46
  input_video,
47
  input_image,
48
  conf_thres=3.0,
49
  frame_filter="all",
50
  mask_black_bg=False,
51
+ show_cam=True
52
  ):
53
  start_time = time.time()
54
  gc.collect()
 
122
  np.savez(prediction_save_path, **predictions)
123
 
124
 
125
+ glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
126
 
127
 
128
+ glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
129
  glbscene.export(file_obj=glbfile)
130
 
131
  del predictions
 
144
  log = "Success. Waiting for visualization."
145
  return glbfile, log, target_dir, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True)
146
 
147
+ def clear_fields():
148
+ """
149
+ Return None for reconstruction_output and target_dir_output
150
+ (and optionally reset frame_filter to "All" or something else if needed).
151
+ """
152
+ return None, None
153
+
154
 
155
 
156
+ def update_visualization(target_dir, conf_thres, frame_filter, mask_black_bg, show_cam):
157
+ # Return early if target_dir is None or "None"
158
+ if target_dir is None or target_dir == "None":
159
+ return None, "No reconstruction available. Please run 'Reconstruct' first.", None
160
 
161
  loaded = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
162
  # predictions = np.load(f"{target_dir}/predictions.npz", allow_pickle=True)
 
164
  # for key in predictions.files: print(key)
165
  predictions = {key: loaded[key] for key in loaded.keys()}
166
 
167
+ glbfile = target_dir + f"/glbscene_{conf_thres}_{frame_filter.replace('.', '_')}_mask{mask_black_bg}_cam{show_cam}.glb"
168
 
169
  if not os.path.exists(glbfile):
170
+ glbscene = demo_predictions_to_glb(predictions, conf_thres=conf_thres, filter_by_frames=frame_filter, mask_black_bg=mask_black_bg, show_cam=show_cam)
171
  glbscene.export(file_obj=glbfile)
172
  return glbfile, "Updating Visualization", target_dir
173
 
 
197
 
198
  kitchen_video = "examples/videos/kitchen_video.mp4"
199
 
200
+ room_video = "examples/videos/room_video.mp4"
201
  ###########################################################################################
202
  apple_images = glob.glob(f'examples/apple/images/*')
203
  bonsai_images = glob.glob(f'examples/bonsai/images/*')
 
216
 
217
  drums_images = glob.glob(f'examples/drums/images/*')
218
  kitchen_images = glob.glob(f'examples/kitchen/images/*')
219
+ room_images = glob.glob(f'examples/room/images/*')
 
220
 
221
  ###########################################################################################
222
 
 
255
  with gr.Row():
256
  conf_thres = gr.Slider(minimum=0.1, maximum=10.0, value=2.0, step=0.1, label="Conf Thres")
257
  frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame")
258
+ with gr.Column():
259
+ show_cam = gr.Checkbox(label="Show Camera", value=True)
260
+ mask_black_bg = gr.Checkbox(label="Filter Black Background", value=False)
261
 
262
  log_output = gr.Textbox(label="Log")
263
+ # Add a hidden textbox for target_dir with default value "None"
264
+ target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None")
265
+
266
+
267
 
268
  with gr.Row():
269
  submit_btn = gr.Button("Reconstruct", scale=1)
270
+ # revisual_btn = gr.Button("Update Visualization", scale=1)
271
  clear_btn = gr.ClearButton([input_video, input_images, reconstruction_output, log_output, target_dir_output], scale=1) #Modified reconstruction_output
272
 
273
 
274
 
275
 
276
  examples = [
277
+ [room_video, room_images, 1.0, "All", False, True],
278
+ [counter_video, counter_images, 1.5, "All", False, True],
279
+ [flower_video, flower_images, 1.5, "All", False, True],
280
+ [kitchen_video, kitchen_images, 3, "All", False, True],
281
+ [fern_video, fern_images, 1.5, "All", False, True],
 
 
 
 
 
282
  ]
283
 
284
  gr.Examples(examples=examples,
285
+ inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
286
  outputs=[reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter
287
  fn=vggt_demo, # Use our wrapper function
288
+ cache_examples=True,
289
  examples_per_page=50,
290
  )
291
 
292
+
293
  submit_btn.click(
294
+ fn=clear_fields,
295
+ inputs=[],
296
+ outputs=[reconstruction_output, target_dir_output]
297
+ ).then(
298
+ fn=vggt_demo,
299
+ inputs=[input_video, input_images, conf_thres, frame_filter, mask_black_bg, show_cam],
300
+ outputs=[reconstruction_output, log_output, target_dir_output, frame_filter]
301
  )
302
 
303
+
304
+ # submit_btn.click(
305
+ # vggt_demo, # Use the same wrapper function
306
+ # [input_video, input_images, conf_thres, frame_filter, mask_black_bg],
307
+ # [reconstruction_output, log_output, target_dir_output, frame_filter], # Added frame_filter to outputs
308
+ # # concurrency_limit=1
309
+ # )
310
+
311
+ # Add event handlers for automatic updates when parameters change
312
+ conf_thres.change(
313
  update_visualization,
314
+ [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
315
  [reconstruction_output, log_output, target_dir_output],
316
  )
317
+
318
+ frame_filter.change(
319
+ update_visualization,
320
+ [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
321
+ [reconstruction_output, log_output, target_dir_output],
322
+ )
323
+
324
+ mask_black_bg.change(
325
+ update_visualization,
326
+ [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
327
+ [reconstruction_output, log_output, target_dir_output],
328
+ )
329
+
330
+ show_cam.change(
331
+ update_visualization,
332
+ [target_dir_output, conf_thres, frame_filter, mask_black_bg, show_cam],
333
+ [reconstruction_output, log_output, target_dir_output],
334
+ )
335
+
336
+ demo.queue(max_size=20).launch(show_error=True, share=True) #, share=True, server_port=7888, server_name="0.0.0.0")
337
+
338
+
339
  # share=True
340
  # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
341
  ########################################################################################################################
342
+
examples/room/images/IMG_1507.HEIC.JPG.JPG DELETED

Git LFS Details

  • SHA256: d7be333a2147ae3d118fefcb343bc29cac112272038c4edd354040210c61e297
  • Pointer size: 131 Bytes
  • Size of remote file: 231 kB
examples/room/images/IMG_1508.HEIC.JPG.JPG DELETED

Git LFS Details

  • SHA256: ec8d7b0e1ce14b62abc05586577bc930737273ab191d94b97a6075cbfbe78485
  • Pointer size: 131 Bytes
  • Size of remote file: 221 kB
examples/room/images/IMG_1509.HEIC.JPG.JPG DELETED

Git LFS Details

  • SHA256: 39f9b9c15bd38073112228c5ab50befbdcc6d5d77f4c3ebafbad0218d13122e3
  • Pointer size: 131 Bytes
  • Size of remote file: 242 kB
examples/room/images/IMG_1510.HEIC.JPG.JPG DELETED

Git LFS Details

  • SHA256: ef867329251646dd1a9a65e53d520abe1ae312f4b03020def00be1d82e079119
  • Pointer size: 131 Bytes
  • Size of remote file: 255 kB
examples/room/images/IMG_1511.HEIC.JPG.JPG DELETED

Git LFS Details

  • SHA256: e1f24a0d124f2a884a8f98a887f6d4c99ac8ed6251f25cd1e59a8ab287328d85
  • Pointer size: 131 Bytes
  • Size of remote file: 256 kB
examples/room/images/IMG_1512.HEIC.JPG.JPG DELETED

Git LFS Details

  • SHA256: 127a830eb92e5a33ea4f01002ef12073e827789e47f4fb59910fa050793eba79
  • Pointer size: 131 Bytes
  • Size of remote file: 128 kB
examples/{room/images/IMG_1506.HEIC.JPG.JPG → videos/room_video.mp4} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd8dc207341579e75338323ad0bf3cd10a0f23bea8a60f1d4b49579f8c606fa0
3
- size 269173
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d21d3682bb84bbeaa6a5b4d766998a083f9a1146fb2ef03761b7c7d98d83d42
3
+ size 1179725
gradio_util.py CHANGED
@@ -16,7 +16,7 @@ from scipy.spatial.transform import Rotation
16
 
17
 
18
 
19
- def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False) -> trimesh.Scene:
20
  """
21
  Converts VGG SFM predictions to a 3D scene represented as a GLB.
22
 
@@ -67,18 +67,18 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
67
  colors_rgb = colors_rgb[conf_mask]
68
 
69
 
70
- # vertices_3d = predictions["points3D"].cpu().numpy()
71
- # colors_rgb = (predictions["points3D_rgb"].cpu().numpy() * 255).astype(
72
- # np.uint8
73
- # )
74
- # camera_matrices = predictions["extrinsics_opencv"].cpu().numpy()
75
-
76
- # Calculate the 5th and 95th percentiles along each axis
77
- lower_percentile = np.percentile(vertices_3d, 5, axis=0)
78
- upper_percentile = np.percentile(vertices_3d, 95, axis=0)
79
-
80
- # Calculate the diagonal length of the percentile bounding box
81
- scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
82
 
83
  colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
84
 
@@ -98,16 +98,17 @@ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all",
98
  extrinsics_matrices[:, :3, :4] = camera_matrices
99
  extrinsics_matrices[:, 3, 3] = 1
100
 
101
- # Add camera models to the scene
102
- for i in range(num_cameras):
103
- world_to_camera = extrinsics_matrices[i]
104
- camera_to_world = np.linalg.inv(world_to_camera)
105
- rgba_color = colormap(i / num_cameras)
106
- current_color = tuple(int(255 * x) for x in rgba_color[:3])
107
-
108
- integrate_camera_into_scene(
109
- scene_3d, camera_to_world, current_color, scene_scale
110
- )
 
111
 
112
  # Align scene to the observation of the first camera
113
  scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)
 
16
 
17
 
18
 
19
+ def demo_predictions_to_glb(predictions, conf_thres=3.0, filter_by_frames="all", mask_black_bg=False, show_cam=True) -> trimesh.Scene:
20
  """
21
  Converts VGG SFM predictions to a 3D scene represented as a GLB.
22
 
 
67
  colors_rgb = colors_rgb[conf_mask]
68
 
69
 
70
+
71
+ if vertices_3d is None or np.asarray(vertices_3d).size == 0:
72
+ vertices_3d = np.array([[1, 0, 0]])
73
+ colors_rgb = np.array([[255, 255, 255]])
74
+ scene_scale = 1
75
+ else:
76
+ # Calculate the 5th and 95th percentiles along each axis
77
+ lower_percentile = np.percentile(vertices_3d, 5, axis=0)
78
+ upper_percentile = np.percentile(vertices_3d, 95, axis=0)
79
+
80
+ # Calculate the diagonal length of the percentile bounding box
81
+ scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
82
 
83
  colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
84
 
 
98
  extrinsics_matrices[:, :3, :4] = camera_matrices
99
  extrinsics_matrices[:, 3, 3] = 1
100
 
101
+ if show_cam:
102
+ # Add camera models to the scene
103
+ for i in range(num_cameras):
104
+ world_to_camera = extrinsics_matrices[i]
105
+ camera_to_world = np.linalg.inv(world_to_camera)
106
+ rgba_color = colormap(i / num_cameras)
107
+ current_color = tuple(int(255 * x) for x in rgba_color[:3])
108
+
109
+ integrate_camera_into_scene(
110
+ scene_3d, camera_to_world, current_color, scene_scale
111
+ )
112
 
113
  # Align scene to the observation of the first camera
114
  scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)