app.py CHANGED
@@ -29,18 +29,14 @@ from mono.utils.visualization import save_val_imgs, create_html, save_raw_imgs,
29
  import cv2
30
  from tqdm import tqdm
31
  import numpy as np
32
- from PIL import Image
33
  import matplotlib.pyplot as plt
34
 
35
- from mono.utils.unproj_pcd import reconstruct_pcd, save_point_cloud
36
  from mono.utils.transform import gray_to_colormap
37
  from mono.utils.visualization import vis_surface_normal
38
  import gradio as gr
39
-
40
- import fire
41
- from mono.utils.unproj_pcd import reconstruct_pcd, save_point_cloud
42
- from datetime import datetime
43
- import time
44
 
45
  #torch.hub.download_url_to_file('https://images.unsplash.com/photo-1437622368342-7a3d73a34c8f', 'turtle.jpg')
46
  #torch.hub.download_url_to_file('https://images.unsplash.com/photo-1519066629447-267fffa62d4b', 'lions.jpg')
@@ -59,30 +55,27 @@ device = "cuda"
59
  model_large.to(device)
60
  model_small.to(device)
61
 
62
-
63
- outputs_dir = "./outs"
64
-
65
- def depth_normal(img_path, model_selection="vit-small"):
66
  if model_selection == "vit-small":
67
  model = model_small
68
  cfg = cfg_small
69
  elif model_selection == "vit-large":
70
  model = model_large
71
  cfg = cfg_large
72
-
73
  else:
74
- raise NotImplementedError
75
 
76
- img = Image.open(img_path)
 
 
77
 
78
  cv_image = np.array(img)
79
- img = cv_image
80
  img = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
81
- intrinsic = [1000.0, 1000.0, img.shape[1]/2, img.shape[0]/2]
82
  rgb_input, cam_models_stacks, pad, label_scale_factor = transform_test_data_scalecano(img, intrinsic, cfg.data_basic)
83
 
84
  with torch.no_grad():
85
- pred_depth, pred_depth_scale, scale, output = get_prediction(
86
  model = model,
87
  input = rgb_input,
88
  cam_model = cam_models_stacks,
@@ -100,125 +93,267 @@ def depth_normal(img_path, model_selection="vit-small"):
100
  pred_depth = pred_depth.squeeze().cpu().numpy()
101
  pred_depth[pred_depth<0] = 0
102
  pred_color = gray_to_colormap(pred_depth)
 
 
 
 
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  ##formatted = (output * 255 / np.max(output)).astype('uint8')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- path_output_dir = os.path.splitext(os.path.basename(img_path))[0] + datetime.now().strftime('%Y%m%d-%H%M%S')
107
- path_output_dir = os.path.join(path_output_dir, outputs_dir)
108
- os.makedirs(path_output_dir, exist_ok=True)
 
109
 
110
- name_base = os.path.splitext(os.path.basename(img_path))[0]
 
 
 
111
 
112
- depth_np = pred_depth
113
- normal_np = torch.nn.functional.interpolate(pred_normal, [img.shape[0], img.shape[1]], mode='bilinear').squeeze().cpu().numpy()
114
- normal_np = normal_np.transpose(1,2,0)
115
 
116
- pred_normal = pred_normal.squeeze()
117
- if pred_normal.size(0) == 3:
118
- pred_normal = pred_normal.permute(1,2,0)
119
- pred_color_normal = vis_surface_normal(pred_normal)
120
 
121
- depth_path = os.path.join(path_output_dir, f"{name_base}_depth.npy")
122
- normal_path = os.path.join(path_output_dir, f"{name_base}_normal.npy")
 
 
 
123
 
124
- np.save(normal_path, normal_np)
125
- np.save(depth_path, depth_np)
126
 
127
- ori_w = img.shape[1]
128
- ori_h = img.shape[0]
129
-
130
- img = Image.fromarray(pred_color)
131
- #img = img.resize((int(300 * ori_w/ ori_h), 300))
132
-
133
- img_normal = Image.fromarray(pred_color_normal)
134
- #img_normal = img_normal.resize((int(300 * ori_w/ ori_h), 300))
135
-
136
- return img, img_normal, [depth_path, normal_path]
137
-
138
- def reconstruction(img_path, files, focal_length, reconstructed_file):
139
- img = Image.open(img_path)
140
- cv_image = np.array(img)
141
- img = cv_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- depth_np = np.load(files[0])
144
- pcd = reconstruct_pcd(depth_np * focal_length / 1000, focal_length, focal_length, img.shape[1]/2, img.shape[0]/2)
145
- pcd_path = files[0].replace('_depth.npy', '.ply')
146
- save_point_cloud(pcd.reshape((-1, 3)), img.reshape(-1, 3), pcd_path)
147
- return [pcd_path]
148
 
149
  title = "Metric3D"
150
- description = "Gradio demo for Metric3D which takes in a single image for computing metric depth and surface normal. To use it, simply upload your image, or click one of the examples to load them. Learn more from our paper linked below."
151
- article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2307.10984.pdf'>Metric3D: Towards Zero-shot Metric 3D Prediction from A Single Image</a> | <a href='https://github.com/YvanYin/Metric3D'>Github Repo</a></p>"
 
 
 
 
 
152
 
153
  examples = [
154
- ["files/museum.jpg"],
155
- ["files/terra.jpg"],
156
- ["files/underwater.jpg"],
157
- ["files/venue.jpg"]
 
 
 
 
 
 
 
158
  ]
159
 
160
- def run_demo():
161
-
162
- _TITLE = '''Metric3Dv2: A versatile monocular geometric foundation model for zero-shot metric depth and surface normal estimation'''
163
- _DESCRIPTION = description
164
-
165
- with gr.Blocks(title=_TITLE) as demo:
166
- with gr.Row():
167
- with gr.Column(scale=1):
168
- gr.Markdown('# ' + _TITLE)
169
- gr.Markdown(_DESCRIPTION)
170
- with gr.Row(variant='panel'):
171
- with gr.Column(scale=1):
172
- #input_image = gr.Image(type='pil', label='Original Image')
173
- input_image = gr.Image(type='filepath', height=300, label='Input image')
174
-
175
- example_folder = os.path.join(os.path.dirname(__file__), "./files")
176
- example_fns = [os.path.join(example_folder, example) for example in os.listdir(example_folder)]
177
- gr.Examples(
178
- examples=example_fns,
179
- inputs=[input_image],
180
- cache_examples=False,
181
- label='Examples (click one of the images below to start)',
182
- examples_per_page=30
183
- )
184
 
185
- model_choice = gr.Dropdown(["vit-small", "vit-large"], label="Model", info="Select a model type", value="vit-small")
186
- run_btn = gr.Button('Predict', variant='primary', interactive=True)
187
-
188
- with gr.Column(scale=1):
189
- depth = gr.Image(interactive=False, label="Depth")
190
- normal = gr.Image(interactive=False, label="Normal")
191
-
192
- with gr.Row():
193
- files = gr.Files(
194
- label = "Depth and Normal (numpy)",
195
- elem_id = "download",
196
- interactive=False,
197
- )
198
-
199
- with gr.Row():
200
- recon_btn = gr.Button('Focal Length Available? If Yes, Enter and Click Here for Metric 3D Reconstruction', variant='primary', interactive=True)
201
- focal_length = gr.Number(value=1000, label="Focal Length")
202
-
203
- with gr.Row():
204
- reconstructed_file = gr.Files(
205
- label = "3D pointclouds (plyfile)",
206
- elem_id = "download",
207
- interactive=False
208
- )
209
-
210
- run_btn.click(fn=depth_normal,
211
- inputs=[input_image,
212
- model_choice],
213
- outputs=[depth, normal, files]
214
- )
215
- recon_btn.click(fn=reconstruction,
216
- inputs=[input_image, files, focal_length],
217
- outputs=[reconstructed_file]
218
- )
219
-
220
- demo.queue().launch(share=True, max_threads=80)
221
-
222
-
223
- if __name__ == '__main__':
224
- fire.Fire(run_demo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  import cv2
30
  from tqdm import tqdm
31
  import numpy as np
32
+ from PIL import Image, ExifTags
33
  import matplotlib.pyplot as plt
34
 
35
+ from mono.utils.unproj_pcd import reconstruct_pcd, save_point_cloud, ply_to_obj
36
  from mono.utils.transform import gray_to_colormap
37
  from mono.utils.visualization import vis_surface_normal
38
  import gradio as gr
39
+ import plotly.graph_objects as go
 
 
 
 
40
 
41
  #torch.hub.download_url_to_file('https://images.unsplash.com/photo-1437622368342-7a3d73a34c8f', 'turtle.jpg')
42
  #torch.hub.download_url_to_file('https://images.unsplash.com/photo-1519066629447-267fffa62d4b', 'lions.jpg')
 
55
  model_large.to(device)
56
  model_small.to(device)
57
 
58
+ def predict_depth_normal(img, model_selection="vit-small", fx=1000.0, fy=1000.0, state_cache={}):
 
 
 
59
  if model_selection == "vit-small":
60
  model = model_small
61
  cfg = cfg_small
62
  elif model_selection == "vit-large":
63
  model = model_large
64
  cfg = cfg_large
 
65
  else:
66
+ return None, None, None, None, state_cache, "Not implemented model."
67
 
68
+ if img is None:
69
+ return None, None, None, None, state_cache, "Please upload an image and wait for the upload to complete."
70
+
71
 
72
  cv_image = np.array(img)
 
73
  img = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
74
+ intrinsic = [fx, fy, img.shape[1]/2, img.shape[0]/2]
75
  rgb_input, cam_models_stacks, pad, label_scale_factor = transform_test_data_scalecano(img, intrinsic, cfg.data_basic)
76
 
77
  with torch.no_grad():
78
+ pred_depth, pred_depth_scale, scale, output, confidence = get_prediction(
79
  model = model,
80
  input = rgb_input,
81
  cam_model = cam_models_stacks,
 
93
  pred_depth = pred_depth.squeeze().cpu().numpy()
94
  pred_depth[pred_depth<0] = 0
95
  pred_color = gray_to_colormap(pred_depth)
96
+
97
+ pred_normal = torch.nn.functional.interpolate(pred_normal, [img.shape[0], img.shape[1]], mode='bilinear').squeeze()
98
+ pred_normal = pred_normal.permute(1,2,0)
99
+ pred_color_normal = vis_surface_normal(pred_normal)
100
+ pred_normal = pred_normal.cpu().numpy()
101
 
102
+ # Storing depth and normal map in state for potential 3D reconstruction
103
+ state_cache['depth'] = pred_depth
104
+ state_cache['normal'] = pred_normal
105
+ state_cache['img'] = img
106
+ state_cache['intrinsic'] = intrinsic
107
+ state_cache['confidence'] = confidence
108
+
109
+ # save depth and normal map to .npy file
110
+ if 'save_dir' not in state_cache:
111
+ cache_id = np.random.randint(0, 100000000000)
112
+ while osp.exists(f'recon_cache/{cache_id:08d}'):
113
+ cache_id = np.random.randint(0, 100000000000)
114
+ state_cache['save_dir'] = f'recon_cache/{cache_id:08d}'
115
+ os.makedirs(state_cache['save_dir'], exist_ok=True)
116
+ depth_file = f"{state_cache['save_dir']}/depth.npy"
117
+ normal_file = f"{state_cache['save_dir']}/normal.npy"
118
+ np.save(depth_file, pred_depth)
119
+ np.save(normal_file, pred_depth)
120
+
121
  ##formatted = (output * 255 / np.max(output)).astype('uint8')
122
+ img = Image.fromarray(pred_color)
123
+ img_normal = Image.fromarray(pred_color_normal)
124
+ return img, depth_file, img_normal, normal_file, state_cache, "Success!"
125
+
126
+ def get_camera(img):
127
+ if img is None:
128
+ return None, None, None, "Please upload an image and wait for the upload to complete."
129
+ try:
130
+ exif = img.getexif()
131
+ exif.update(exif.get_ifd(ExifTags.IFD.Exif))
132
+ except:
133
+ exif = {}
134
+ sensor_width = exif.get(ExifTags.Base.FocalPlaneYResolution, None)
135
+ sensor_height = exif.get(ExifTags.Base.FocalPlaneXResolution, None)
136
+ focal_length = exif.get(ExifTags.Base.FocalLength, None)
137
+
138
+ # convert sensor size to mm, see https://photo.stackexchange.com/questions/40865/how-can-i-get-the-image-sensor-dimensions-in-mm-to-get-circle-of-confusion-from
139
+ w, h = img.size
140
+ sensor_width = w / sensor_width * 25.4 if sensor_width is not None else None
141
+ sensor_height = h / sensor_height * 25.4 if sensor_height is not None else None
142
+ focal_length = focal_length * 1.0 if focal_length is not None else None
143
+
144
+ message = "Success!"
145
+ if focal_length is None:
146
+ message = "Focal length not found in EXIF. Please manually input."
147
+ elif sensor_width is None and sensor_height is None:
148
+ sensor_width = 16
149
+ sensor_height = h / w * sensor_width
150
+ message = f"Sensor size not found in EXIF. Using {sensor_width}x{sensor_height:.2f} mm as default."
151
+
152
+ return sensor_width, sensor_height, focal_length, message
153
+
154
+ def get_intrinsic(img, sensor_width, sensor_height, focal_length):
155
+ if img is None:
156
+ return None, None, "Please upload an image and wait for the upload to complete."
157
+ if sensor_width is None or sensor_height is None or focal_length is None:
158
+ return 1000, 1000, "Insufficient information. Try detecting camera first or use default 1000 for fx and fy."
159
+ if sensor_width == 0 or sensor_height == 0 or focal_length == 0:
160
+ return 1000, 1000, "Insufficient information. Try detecting camera first or use default 1000 for fx and fy."
161
 
162
+ # calculate focal length in pixels
163
+ w, h = img.size
164
+ fx = w / sensor_width * focal_length if sensor_width is not None else None
165
+ fy = h / sensor_height * focal_length if sensor_height is not None else None
166
 
167
+ # if fx is None:
168
+ # return fy, fy, "Sensor width not provided, using fy for both fx and fy"
169
+ # if fy is None:
170
+ # return fx, fx, "Sensor height not provided, using fx for both fx and fy"
171
 
172
+ return fx, fy, "Success!"
 
 
173
 
 
 
 
 
174
 
175
+ def unprojection_pcd(state_cache):
176
+ depth_map = state_cache.get('depth', None)
177
+ normal_map = state_cache.get('normal', None)
178
+ img = state_cache.get('img', None)
179
+ intrinsic = state_cache.get('intrinsic', None)
180
 
181
+ if depth_map is None or img is None:
182
+ return None, "Please predict depth and normal first."
183
 
184
+ # # downsample/upsample the depth map to confidence map size
185
+ # confidence = state_cache.get('confidence', None)
186
+ # if confidence is not None:
187
+ # H, W = confidence.shape
188
+ # # intrinsic[0] *= W / depth_map.shape[1]
189
+ # # intrinsic[1] *= H / depth_map.shape[0]
190
+ # # intrinsic[2] *= W / depth_map.shape[1]
191
+ # # intrinsic[3] *= H / depth_map.shape[0]
192
+ # depth_map = cv2.resize(depth_map, (W, H), interpolation=cv2.INTER_LINEAR)
193
+ # img = cv2.resize(img, (W, H), interpolation=cv2.INTER_LINEAR)
194
+
195
+ # # filter out depth map by confidence
196
+ # mask = confidence.cpu().numpy() > 0
197
+
198
+ # downsample the depth map if too large
199
+ if depth_map.shape[0] > 1080:
200
+ scale = 1080 / depth_map.shape[0]
201
+ depth_map = cv2.resize(depth_map, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
202
+ img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
203
+ intrinsic = [intrinsic[0]*scale, intrinsic[1]*scale, intrinsic[2]*scale, intrinsic[3]*scale]
204
+
205
+ if 'save_dir' not in state_cache:
206
+ cache_id = np.random.randint(0, 100000000000)
207
+ while osp.exists(f'recon_cache/{cache_id:08d}'):
208
+ cache_id = np.random.randint(0, 100000000000)
209
+ state_cache['save_dir'] = f'recon_cache/{cache_id:08d}'
210
+ os.makedirs(state_cache['save_dir'], exist_ok=True)
211
+
212
+ pcd_ply = f"{state_cache['save_dir']}/output.ply"
213
+ pcd_obj = pcd_ply.replace(".ply", ".obj")
214
+
215
+ pcd = reconstruct_pcd(depth_map, intrinsic[0], intrinsic[1], intrinsic[2], intrinsic[3])
216
+ # if mask is not None:
217
+ # pcd_filtered = pcd[mask]
218
+ # img_filtered = img[mask]
219
+ pcd_filtered = pcd.reshape(-1, 3)
220
+ img_filtered = img.reshape(-1, 3)
221
+
222
+ save_point_cloud(pcd_filtered, img_filtered, pcd_ply, binary=False)
223
+ # ply_to_obj(pcd_ply, pcd_obj)
224
+
225
+ # downsample the point cloud for visualization
226
+ num_samples = 250000
227
+ if pcd_filtered.shape[0] > num_samples:
228
+ indices = np.random.choice(pcd_filtered.shape[0], num_samples, replace=False)
229
+ pcd_downsampled = pcd_filtered[indices]
230
+ img_downsampled = img_filtered[indices]
231
+ else:
232
+ pcd_downsampled = pcd_filtered
233
+ img_downsampled = img_filtered
234
+
235
+ # plotly show
236
+ color_str = np.array([f"rgb({r},{g},{b})" for b,g,r in img_downsampled])
237
+ data=[go.Scatter3d(
238
+ x=pcd_downsampled[:,0],
239
+ y=pcd_downsampled[:,1],
240
+ z=pcd_downsampled[:,2],
241
+ mode='markers',
242
+ marker=dict(
243
+ size=1,
244
+ color=color_str,
245
+ opacity=0.8,
246
+ )
247
+ )]
248
+ layout = go.Layout(
249
+ margin=dict(l=0, r=0, b=0, t=0),
250
+ scene=dict(
251
+ camera = dict(
252
+ eye=dict(x=0, y=0, z=-1),
253
+ up=dict(x=0, y=-1, z=0)
254
+ ),
255
+ xaxis=dict(showgrid=False, showticklabels=False, visible=False),
256
+ yaxis=dict(showgrid=False, showticklabels=False, visible=False),
257
+ zaxis=dict(showgrid=False, showticklabels=False, visible=False),
258
+ )
259
+ )
260
+ fig = go.Figure(data=data, layout=layout)
261
+
262
+ return fig, pcd_ply, "Success!"
263
 
 
 
 
 
 
264
 
265
  title = "Metric3D"
266
+ description = '''# Metric3Dv2: A versatile monocular geometric foundation model for zero-shot metric depth and surface normal estimation
267
+ Gradio demo for Metric3D v1/v2 which takes in a single image for computing metric depth and surface normal. To use it, simply upload your image, or click one of the examples to load them. Learn more from our paper linked below.'''
268
+ article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2307.10984.pdf'>Metric3D arxiv</a> | <a href='https://arxiv.org/abs/2404.15506'>Metric3Dv2 arxiv</a> | <a href='https://github.com/YvanYin/Metric3D'>Github Repo</a></p>"
269
+
270
+ custom_css = '''#button1, #button2 {
271
+ width: 20px;
272
+ }'''
273
 
274
  examples = [
275
+ #["turtle.jpg"],
276
+ #["lions.jpg"]
277
+ #["files/gundam.jpg"],
278
+ "files/p50_pro.jpg",
279
+ "files/iphone13.JPG",
280
+ "files/canon_cat.JPG",
281
+ "files/canon_dog.JPG",
282
+ "files/museum.jpg",
283
+ "files/terra.jpg",
284
+ "files/underwater.jpg",
285
+ "files/venue.jpg",
286
  ]
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ with gr.Blocks(title=title, css=custom_css) as demo:
290
+ gr.Markdown(description + article)
291
+
292
+ # input and control components
293
+ with gr.Row():
294
+ with gr.Column():
295
+ image_input = gr.Image(type='pil', label="Original Image")
296
+ _ = gr.Examples(examples=examples, inputs=[image_input])
297
+ with gr.Column():
298
+ model_dropdown = gr.Dropdown(["vit-small", "vit-large"], label="Model", value="vit-large")
299
+
300
+ with gr.Accordion('Advanced options (beta)', open=True):
301
+ with gr.Row():
302
+ sensor_width = gr.Number(None, label="Sensor Width in mm", precision=2)
303
+ sensor_height = gr.Number(None, label="Sensor Height in mm", precision=2)
304
+ focal_len = gr.Number(None, label="Focal Length in mm", precision=2)
305
+ camera_detector = gr.Button("Detect Camera from EXIF", elem_id="#button1")
306
+ with gr.Row():
307
+ fx = gr.Number(1000.0, label="fx in pixels", precision=2)
308
+ fy = gr.Number(1000.0, label="fy in pixels", precision=2)
309
+ focal_detector = gr.Button("Calculate Intrinsic", elem_id="#button2")
310
+
311
+ message_box = gr.Textbox(label="Messages")
312
+
313
+ # depth and normal
314
+ submit_button = gr.Button("Predict Depth and Normal")
315
+ with gr.Row():
316
+ with gr.Column():
317
+ depth_output = gr.Image(label="Output Depth")
318
+ depth_file = gr.File(label="Depth (.npy)")
319
+ with gr.Column():
320
+ normal_output = gr.Image(label="Output Normal")
321
+ normal_file = gr.File(label="Normal (.npy)")
322
+
323
+ # 3D reconstruction
324
+ reconstruct_button = gr.Button("Reconstruct 3D")
325
+ pcd_output = gr.Plot(label="3D Point Cloud (Sampled sparse version)")
326
+ pcd_ply = gr.File(label="3D Point Cloud (.ply)")
327
+
328
+ # cache for depth, normal maps and other states
329
+ state_cache = gr.State({})
330
+
331
+ # detect focal length in pixels
332
+ camera_detector.click(fn=get_camera, inputs=[image_input], outputs=[sensor_width, sensor_height, focal_len, message_box])
333
+ focal_detector.click(fn=get_intrinsic, inputs=[image_input, sensor_width, sensor_height, focal_len], outputs=[fx, fy, message_box])
334
+
335
+ submit_button.click(fn=predict_depth_normal, inputs=[image_input, model_dropdown, fx, fy, state_cache], outputs=[depth_output, depth_file, normal_output, normal_file, state_cache, message_box])
336
+ reconstruct_button.click(fn=unprojection_pcd, inputs=[state_cache], outputs=[pcd_output, pcd_ply, message_box])
337
+
338
+ demo.launch()
339
+
340
+
341
+ # iface = gr.Interface(
342
+ # depth_normal,
343
+ # inputs=[
344
+ # gr.Image(type='pil', label="Original Image"),
345
+ # gr.Dropdown(["vit-small", "vit-large"], label="Model", info="Select a model type", value="vit-large")
346
+ # ],
347
+ # outputs=[
348
+ # gr.Image(type="pil", label="Output Depth"),
349
+ # gr.Image(type="pil", label="Output Normal"),
350
+ # gr.Textbox(label="Messages")
351
+ # ],
352
+ # title=title,
353
+ # description=description,
354
+ # article=article,
355
+ # examples=examples,
356
+ # analytics_enabled=False
357
+ # )
358
+
359
+ # iface.launch()
files/canon_cat.JPG ADDED

Git LFS Details

  • SHA256: 981f5a5c39f43e5c0f5ef18c6b470fc467d538b63949e71c60ba4a8a32adc8d7
  • Pointer size: 132 Bytes
  • Size of remote file: 3.11 MB
files/canon_dog.JPG ADDED

Git LFS Details

  • SHA256: aa4a127b7df54ca80b59460b45624b9f1643c2faec7f2085510aaa01c5627f8d
  • Pointer size: 132 Bytes
  • Size of remote file: 3.21 MB
files/iphone13.JPG ADDED

Git LFS Details

  • SHA256: 415b3684d4bd411b714815de740d02c8e1ff77d054494895f1c329fa4d69e9b9
  • Pointer size: 132 Bytes
  • Size of remote file: 3.66 MB
files/p50_pro.jpg ADDED

Git LFS Details

  • SHA256: 0d093032a981c08e945fcd4b9ed928674a7966246a24e995d0a6b0dfd3ed082d
  • Pointer size: 132 Bytes
  • Size of remote file: 4.6 MB
mono/utils/do_test.py CHANGED
@@ -154,9 +154,10 @@ def get_prediction(
154
  )
155
  #pred_depth, confidence, output_dict = model.module.inference(data)
156
  pred_depth, confidence, output_dict = model.inference(data)
157
- pred_depth = pred_depth
158
  pred_depth = pred_depth.squeeze()
159
  pred_depth = pred_depth[pad_info[0] : pred_depth.shape[0] - pad_info[1], pad_info[2] : pred_depth.shape[1] - pad_info[3]]
 
 
160
  if gt_depth is not None:
161
  resize_shape = gt_depth.shape
162
  elif ori_shape != []:
@@ -172,7 +173,7 @@ def get_prediction(
172
  pred_depth_scale = None
173
  scale = None
174
 
175
- return pred_depth, pred_depth_scale, scale, output_dict
176
 
177
  def transform_test_data_scalecano(rgb, intrinsic, data_basic):
178
  """
 
154
  )
155
  #pred_depth, confidence, output_dict = model.module.inference(data)
156
  pred_depth, confidence, output_dict = model.inference(data)
 
157
  pred_depth = pred_depth.squeeze()
158
  pred_depth = pred_depth[pad_info[0] : pred_depth.shape[0] - pad_info[1], pad_info[2] : pred_depth.shape[1] - pad_info[3]]
159
+ confidence = confidence.squeeze()
160
+ confidence = confidence[pad_info[0] : confidence.shape[0] - pad_info[1], pad_info[2] : confidence.shape[1] - pad_info[3]]
161
  if gt_depth is not None:
162
  resize_shape = gt_depth.shape
163
  elif ori_shape != []:
 
173
  pred_depth_scale = None
174
  scale = None
175
 
176
+ return pred_depth, pred_depth_scale, scale, output_dict, confidence
177
 
178
  def transform_test_data_scalecano(rgb, intrinsic, data_basic):
179
  """
mono/utils/unproj_pcd.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from plyfile import PlyData, PlyElement
4
  import cv2
5
 
 
6
 
7
  def get_pcd_base(H, W, u0, v0, fx, fy):
8
  x_row = np.arange(0, W)
@@ -85,4 +86,32 @@ def save_point_cloud(pcd, rgb, filename, binary=True):
85
  'property uchar blue\n' \
86
  'end_header' % r.shape[0]
87
  # ---- Save ply data to disk
88
- np.savetxt(filename, np.column_stack[x, y, z, r, g, b], fmt='%f %f %f %d %d %d', header=ply_head, comments='')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from plyfile import PlyData, PlyElement
4
  import cv2
5
 
6
+ import trimesh
7
 
8
  def get_pcd_base(H, W, u0, v0, fx, fy):
9
  x_row = np.arange(0, W)
 
86
  'property uchar blue\n' \
87
  'end_header' % r.shape[0]
88
  # ---- Save ply data to disk
89
+ np.savetxt(filename, np.column_stack([x, y, z, r, g, b]), fmt='%f %f %f %d %d %d', header=ply_head, comments='')
90
+
91
+ def ply_to_obj(ply_file, obj_file):
92
+ mesh = trimesh.load_mesh(ply_file)
93
+ # T2 = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
94
+ # mesh.apply_transform(T2)
95
+ mesh.export(obj_file)
96
+
97
+
98
+ # import numpy as np
99
+
100
+ # def save_point_cloud_to_obj(points, colors, file_name):
101
+ # """
102
+ # Save a numpy array of point cloud data with color to an OBJ file.
103
+
104
+ # Args:
105
+ # points (np.ndarray): A numpy array of shape (H, W, 3) where H is height, W is width.
106
+ # colors (np.ndarray): A numpy array of color data, shape (H, W, 3), values should be in [0, 1].
107
+ # file_name (str): The path to the output .obj file.
108
+ # """
109
+ # H, W, _ = points.shape
110
+ # assert points.shape == colors.shape, "Points and colors must have the same shape"
111
+
112
+ # with open(file_name, 'w') as file:
113
+ # for i in range(H):
114
+ # for j in range(W):
115
+ # x, y, z = points[i, j]
116
+ # r, g, b = colors[i, j]
117
+ # file.write(f"v {x} {y} {z} {r} {g} {b}\n")
requirements.txt CHANGED
@@ -19,4 +19,6 @@ datetime
19
  gradio==4.11.0
20
  gradio-imageslider==0.0.16
21
  cupy-cuda12x
22
- plotly
 
 
 
19
  gradio==4.11.0
20
  gradio-imageslider==0.0.16
21
  cupy-cuda12x
22
+ plotly
23
+ trimesh
24
+ exifread