Spaces:
Running
on
Zero
Running
on
Zero
import cv2 | |
import torch | |
import numpy as np | |
import gradio as gr | |
from depth_anything_v2.dpt import DepthAnythingV2 | |
# Model initialization | |
model_configs = { | |
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, | |
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, | |
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]} | |
} | |
def initialize_model(): | |
encoder = 'vitl' | |
max_depth = 1 | |
model = DepthAnythingV2(**{**model_configs[encoder], 'max_depth': max_depth}) | |
# Load checkpoint | |
checkpoint = torch.load('checkpoints/model2.pth', map_location='cpu') | |
# Get state dict | |
state_dict = {} | |
for key in checkpoint.keys(): | |
if key not in ['optimizer', 'epoch', 'previous_best']: | |
state_dict = checkpoint[key] | |
# Handle module prefix | |
my_state_dict = {} | |
for key in state_dict.keys(): | |
new_key = key.replace('module.', '') | |
my_state_dict[new_key] = state_dict[key] | |
model.load_state_dict(my_state_dict) | |
model.eval() | |
return model | |
# Initialize model globally | |
MODEL = initialize_model() | |
def process_image(input_image): | |
""" | |
Process the input image and return depth maps | |
""" | |
# Convert from RGB to BGR (since original code uses cv2.imread which loads in BGR) | |
if input_image is None: | |
return None, None | |
input_image = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB_BGR) | |
# Get depth map | |
depth = MODEL.infer_image(input_image) | |
# Normalize depth for visualization (0-255) | |
depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8) | |
# Apply colormap for better visualization | |
depth_colormap = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_INFERNO) | |
depth_colormap = cv2.cvtColor(depth_colormap, cv2.COLOR_BGR2RGB) # Convert back to RGB for Gradio | |
return depth_normalized, depth_colormap | |
# Create Gradio interface | |
def gradio_interface(input_img): | |
depth_raw, depth_colored = process_image(input_img) | |
return [input_img, depth_raw, depth_colored] | |
# Define interface | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Image(label="Input Image"), | |
outputs=[ | |
gr.Image(label="Original Image"), | |
gr.Image(label="Raw Depth Map"), | |
gr.Image(label="Colored Depth Map") | |
], | |
title="Depth Estimation", | |
description="Upload an image to generate its depth map.", | |
examples=["image.jpg"] # Add example images here | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
iface.launch() | |