JianyuanWang commited on
Commit
b19c7bf
1 Parent(s): 27a6ae5
app.py CHANGED
@@ -42,7 +42,7 @@ def vggsfm_demo(
42
 
43
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
44
 
45
- max_input_image = 20
46
 
47
  target_dir = f"input_images_{timestamp}"
48
  if os.path.exists(target_dir):
@@ -203,7 +203,7 @@ with gr.Blocks() as demo:
203
  <li>upload the images (.jpg, .png, etc.), or </li>
204
  <li>upload a video (.mp4, .mov, etc.) </li>
205
  </ul>
206
- <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract <strong> 1 image frame per second from the input video </strong>. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
207
  <p>SfM methods are designed for <strong> rigid/static reconstruction </strong>. When dealing with dynamic/moving inputs, these methods may still work by focusing on the rigid parts of the scene. However, to ensure high-quality results, it is better to minimize the presence of moving objects in the input data. </p>
208
  <p>The reconstruction should typically take <strong> up to 90 seconds </strong>. If it takes longer, the input data is likely not well-conditioned. </p>
209
  <p>If you meet any problem, feel free to create an issue in our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
@@ -245,6 +245,7 @@ with gr.Blocks() as demo:
245
  cache_examples=True,
246
  )
247
 
 
248
  submit_btn.click(
249
  vggsfm_demo,
250
  [input_video, input_images, num_query_images, num_query_points],
 
42
 
43
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
44
 
45
+ max_input_image = 25
46
 
47
  target_dir = f"input_images_{timestamp}"
48
  if os.path.exists(target_dir):
 
203
  <li>upload the images (.jpg, .png, etc.), or </li>
204
  <li>upload a video (.mp4, .mov, etc.) </li>
205
  </ul>
206
+ <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract <strong> 1 image frame per second from the input video </strong>. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 25 image frames. </p>
207
  <p>SfM methods are designed for <strong> rigid/static reconstruction </strong>. When dealing with dynamic/moving inputs, these methods may still work by focusing on the rigid parts of the scene. However, to ensure high-quality results, it is better to minimize the presence of moving objects in the input data. </p>
208
  <p>The reconstruction should typically take <strong> up to 90 seconds </strong>. If it takes longer, the input data is likely not well-conditioned. </p>
209
  <p>If you meet any problem, feel free to create an issue in our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
 
245
  cache_examples=True,
246
  )
247
 
248
+
249
  submit_btn.click(
250
  vggsfm_demo,
251
  [input_video, input_images, num_query_images, num_query_points],
vggsfm_code/vggsfm/models/triangulator.py CHANGED
@@ -323,7 +323,7 @@ class Triangulator(nn.Module):
323
  # We adopt LORANSAC here again
324
 
325
  best_triangulated_points, best_inlier_num, best_inlier_mask = triangulate_tracks(
326
- extrinsics, tracks_normalized_refined, track_vis=pred_vis, track_score=pred_score, max_ransac_iters=128
327
  )
328
 
329
  # Determine valid tracks based on inlier numbers
 
323
  # We adopt LORANSAC here again
324
 
325
  best_triangulated_points, best_inlier_num, best_inlier_mask = triangulate_tracks(
326
+ extrinsics, tracks_normalized_refined, track_vis=pred_vis, track_score=pred_score
327
  )
328
 
329
  # Determine valid tracks based on inlier numbers
vggsfm_code/vggsfm/utils/triangulation.py CHANGED
@@ -755,7 +755,7 @@ def iterative_global_BA(
755
 
756
  # triangulate tracks by LORANSAC
757
  best_triangulated_points, best_inlier_num, best_inlier_mask = triangulate_tracks(
758
- extrinsics, tracks_normalized_refined, track_vis=pred_vis, track_score=pred_score, max_ransac_iters=128
759
  )
760
 
761
  best_triangulated_points[valid_tracks] = points3D_opt
 
755
 
756
  # triangulate tracks by LORANSAC
757
  best_triangulated_points, best_inlier_num, best_inlier_mask = triangulate_tracks(
758
+ extrinsics, tracks_normalized_refined, track_vis=pred_vis, track_score=pred_score
759
  )
760
 
761
  best_triangulated_points[valid_tracks] = points3D_opt
vggsfm_code/vggsfm/utils/triangulation_helpers.py CHANGED
@@ -384,7 +384,7 @@ def generate_combinations(N):
384
  return comb_array
385
 
386
 
387
- def local_refinement_tri(points1, extrinsics, inlier_mask, sorted_indices, lo_num=50):
388
  """
389
  Local Refinement for triangulation
390
  """
@@ -392,7 +392,6 @@ def local_refinement_tri(points1, extrinsics, inlier_mask, sorted_indices, lo_nu
392
  batch_index = torch.arange(B).unsqueeze(-1).expand(-1, lo_num)
393
 
394
  points1_expand = points1.unsqueeze(1).expand(-1, lo_num, -1, -1)
395
- extrinsics_expand = extrinsics.unsqueeze(1).expand(-1, lo_num, -1, -1, -1)
396
 
397
  # The sets selected for local refinement
398
  lo_indices = sorted_indices[:, :lo_num]
@@ -402,18 +401,38 @@ def local_refinement_tri(points1, extrinsics, inlier_mask, sorted_indices, lo_nu
402
  lo_points1 = torch.zeros_like(points1_expand)
403
  lo_points1[lo_mask] = points1_expand[lo_mask]
404
 
405
- lo_points1 = lo_points1.reshape(B * lo_num, N, -1)
406
- lo_mask = lo_mask.reshape(B * lo_num, N)
407
- lo_extrinsics = extrinsics_expand.reshape(B * lo_num, N, 3, 4)
408
-
409
- # triangulate the inliers
410
- triangulated_points, tri_angles, invalid_che_mask = triangulate_multi_view_point_batched(
411
- lo_extrinsics, lo_points1, mask=lo_mask, compute_tri_angle=True, check_cheirality=True
412
- )
413
 
414
- triangulated_points = triangulated_points.reshape(B, lo_num, 3)
415
- tri_angles = tri_angles.reshape(B, lo_num, -1)
416
 
417
- invalid_che_mask = invalid_che_mask.reshape(B, lo_num)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  return triangulated_points, tri_angles, invalid_che_mask
 
384
  return comb_array
385
 
386
 
387
+ def local_refinement_tri(points1, extrinsics, inlier_mask, sorted_indices, lo_num=50, low_mem=True):
388
  """
389
  Local Refinement for triangulation
390
  """
 
392
  batch_index = torch.arange(B).unsqueeze(-1).expand(-1, lo_num)
393
 
394
  points1_expand = points1.unsqueeze(1).expand(-1, lo_num, -1, -1)
 
395
 
396
  # The sets selected for local refinement
397
  lo_indices = sorted_indices[:, :lo_num]
 
401
  lo_points1 = torch.zeros_like(points1_expand)
402
  lo_points1[lo_mask] = points1_expand[lo_mask]
403
 
 
 
 
 
 
 
 
 
404
 
 
 
405
 
406
+ if low_mem:
407
+ all_triangulated_points = []
408
+ all_tri_angles = []
409
+ all_invalid_che_mask = []
410
+
411
+ for loidx in range(lo_num):
412
+ triangulated_points, tri_angles, invalid_che_mask = triangulate_multi_view_point_batched(
413
+ extrinsics, lo_points1[:, loidx], mask=lo_mask[:, loidx], compute_tri_angle=True, check_cheirality=True
414
+ )
415
+ # Append the outputs to the respective lists
416
+ all_triangulated_points.append(triangulated_points[:, None])
417
+ all_tri_angles.append(tri_angles[:, None])
418
+ all_invalid_che_mask.append(invalid_che_mask[:,None])
419
+
420
+ triangulated_points = torch.cat(all_triangulated_points, dim=1)
421
+ tri_angles = torch.cat(all_tri_angles, dim=1)
422
+ invalid_che_mask = torch.cat(all_invalid_che_mask, dim=1)
423
+ else:
424
+ extrinsics_expand = extrinsics.unsqueeze(1).expand(-1, lo_num, -1, -1, -1)
425
+ lo_points1 = lo_points1.reshape(B * lo_num, N, -1)
426
+ lo_mask = lo_mask.reshape(B * lo_num, N)
427
+ lo_extrinsics = extrinsics_expand.reshape(B * lo_num, N, 3, 4)
428
+
429
+ # triangulate the inliers
430
+ triangulated_points, tri_angles, invalid_che_mask = triangulate_multi_view_point_batched(
431
+ lo_extrinsics, lo_points1, mask=lo_mask, compute_tri_angle=True, check_cheirality=True
432
+ )
433
+
434
+ triangulated_points = triangulated_points.reshape(B, lo_num, 3)
435
+ tri_angles = tri_angles.reshape(B, lo_num, -1)
436
+ invalid_che_mask = invalid_che_mask.reshape(B, lo_num)
437
 
438
  return triangulated_points, tri_angles, invalid_che_mask