Lotus_Depth

Runtime error

App Files Files Community

supersolar commited on Oct 12, 2024

Commit

2c27418

verified ·

1 Parent(s): f8a8adc

Update infer.py

Browse files

Files changed (1) hide show

infer.py +10 -39

infer.py CHANGED Viewed

@@ -65,25 +65,17 @@ def infer_pipe(pipe, image_input, task_name, seed, device):
 def lotus_video(input_video, task_name, seed, device):
     if task_name == 'depth':
         model_g = 'jingheya/lotus-depth-g-v1-0'
-        model_d = 'jingheya/lotus-depth-d-v1-0'
     else:
         model_g = 'jingheya/lotus-normal-g-v1-0'
-        model_d = 'jingheya/lotus-normal-d-v1-0'
     dtype = torch.float16
     pipe_g = LotusGPipeline.from_pretrained(
         model_g,
         torch_dtype=dtype,
     )
-    pipe_d = LotusDPipeline.from_pretrained(
-        model_d,
-        torch_dtype=dtype,
-    )
     pipe_g.to(device)
-    pipe_d.to(device)
     pipe_g.set_progress_bar_config(disable=True)
-    pipe_d.set_progress_bar_config(disable=True)
-    logging.info(f"Successfully loading pipeline from {model_g} and {model_d}.")
     # load the video and split it into frames
     cap = cv2.VideoCapture(input_video)
@@ -105,7 +97,6 @@ def lotus_video(input_video, task_name, seed, device):
     task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)], dim=-1).repeat(1, 1)
     output_g = []
-    output_d = []
     for frame in frames:
         if torch.backends.mps.is_available():
             autocast_ctx = nullcontext()
@@ -129,59 +120,39 @@ def lotus_video(input_video, task_name, seed, device):
                 timesteps=[999],
                 task_emb=task_emb,
                 ).images[0]
-            pred_d = pipe_d(
-                rgb_in=test_image,
-                prompt='',
-                num_inference_steps=1,
-                generator=generator,
-                # guidance_scale=0,
-                output_type='np',
-                timesteps=[999],
-                task_emb=task_emb,
-                ).images[0]
             # Post-process the prediction
             if task_name == 'depth':
                 output_npy_g = pred_g.mean(axis=-1)
                 output_color_g = colorize_depth_map(output_npy_g)
-                output_npy_d = pred_d.mean(axis=-1)
-                output_color_d = colorize_depth_map(output_npy_d)
             else:
                 output_npy_g = pred_g
                 output_color_g = Image.fromarray((output_npy_g * 255).astype(np.uint8))
-                output_npy_d = pred_d
-                output_color_d = Image.fromarray((output_npy_d * 255).astype(np.uint8))
             output_g.append(output_color_g)
-            output_d.append(output_color_d)
-    return output_g, output_d
 def lotus(image_input, task_name, seed, device):
     if task_name == 'depth':
         model_g = 'jingheya/lotus-depth-g-v1-0'
-        model_d = 'jingheya/lotus-depth-d-v1-1'
     else:
         model_g = 'jingheya/lotus-normal-g-v1-0'
-        model_d = 'jingheya/lotus-normal-d-v1-0'
     dtype = torch.float16
     pipe_g = LotusGPipeline.from_pretrained(
         model_g,
         torch_dtype=dtype,
     )
-    pipe_d = LotusDPipeline.from_pretrained(
-        model_d,
-        torch_dtype=dtype,
-    )
     pipe_g.to(device)
-    pipe_d.to(device)
     pipe_g.set_progress_bar_config(disable=True)
-    pipe_d.set_progress_bar_config(disable=True)
-    logging.info(f"Successfully loading pipeline from {model_g} and {model_d}.")
     output_g = infer_pipe(pipe_g, image_input, task_name, seed, device)
-    output_d = infer_pipe(pipe_d, image_input, task_name, seed, device)
-    return output_g, output_d
 def parse_args():
     '''Set the Args'''

 def lotus_video(input_video, task_name, seed, device):
     if task_name == 'depth':
         model_g = 'jingheya/lotus-depth-g-v1-0'
     else:
         model_g = 'jingheya/lotus-normal-g-v1-0'
     dtype = torch.float16
     pipe_g = LotusGPipeline.from_pretrained(
         model_g,
         torch_dtype=dtype,
     )
     pipe_g.to(device)
     pipe_g.set_progress_bar_config(disable=True)
+    logging.info(f"Successfully loading pipeline from {model_g}.")
     # load the video and split it into frames
     cap = cv2.VideoCapture(input_video)
     task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)], dim=-1).repeat(1, 1)
     output_g = []
     for frame in frames:
         if torch.backends.mps.is_available():
             autocast_ctx = nullcontext()
                 timesteps=[999],
                 task_emb=task_emb,
                 ).images[0]
             # Post-process the prediction
             if task_name == 'depth':
                 output_npy_g = pred_g.mean(axis=-1)
                 output_color_g = colorize_depth_map(output_npy_g)
             else:
                 output_npy_g = pred_g
                 output_color_g = Image.fromarray((output_npy_g * 255).astype(np.uint8))
             output_g.append(output_color_g)
+    return output_g
 def lotus(image_input, task_name, seed, device):
     if task_name == 'depth':
         model_g = 'jingheya/lotus-depth-g-v1-0'
     else:
         model_g = 'jingheya/lotus-normal-g-v1-0'
     dtype = torch.float16
     pipe_g = LotusGPipeline.from_pretrained(
         model_g,
         torch_dtype=dtype,
     )
     pipe_g.to(device)
     pipe_g.set_progress_bar_config(disable=True)
+    logging.info(f"Successfully loading pipeline from {model_g}.")
     output_g = infer_pipe(pipe_g, image_input, task_name, seed, device)
+    return output_g
 def parse_args():
     '''Set the Args'''