import cv2 import torch import numpy as np import gradio as gr from depth_anything_v2.dpt import DepthAnythingV2 # Model initialization model_configs = { 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]} } def initialize_model(): encoder = 'vitl' max_depth = 1 model = DepthAnythingV2(**{**model_configs[encoder], 'max_depth': max_depth}) # Load checkpoint checkpoint = torch.load('checkpoints/model2.pth', map_location='cpu') # Get state dict state_dict = {} for key in checkpoint.keys(): if key not in ['optimizer', 'epoch', 'previous_best']: state_dict = checkpoint[key] # Handle module prefix my_state_dict = {} for key in state_dict.keys(): new_key = key.replace('module.', '') my_state_dict[new_key] = state_dict[key] model.load_state_dict(my_state_dict) model.eval() return model # Initialize model globally MODEL = initialize_model() def process_image(input_image): """ Process the input image and return depth maps """ # Convert from RGB to BGR (since original code uses cv2.imread which loads in BGR) if input_image is None: return None, None input_image = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB_BGR) # Get depth map depth = MODEL.infer_image(input_image) # Normalize depth for visualization (0-255) depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8) # Apply colormap for better visualization depth_colormap = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_INFERNO) depth_colormap = cv2.cvtColor(depth_colormap, cv2.COLOR_BGR2RGB) # Convert back to RGB for Gradio return depth_normalized, depth_colormap # Create Gradio interface def gradio_interface(input_img): depth_raw, depth_colored = process_image(input_img) return [input_img, depth_raw, depth_colored] # Define interface iface = gr.Interface( fn=gradio_interface, inputs=gr.Image(label="Input Image"), outputs=[ gr.Image(label="Original Image"), gr.Image(label="Raw Depth Map"), gr.Image(label="Colored Depth Map") ], title="Depth Estimation", description="Upload an image to generate its depth map.", examples=["image.jpg"] # Add example images here ) # Launch the app if __name__ == "__main__": iface.launch()