PBRGeneration / app.py
NightRaven109's picture
Create app.py
c4a40e9 verified
raw
history blame
2.63 kB
import cv2
import torch
import numpy as np
import gradio as gr
from depth_anything_v2.dpt import DepthAnythingV2
# Model initialization
model_configs = {
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}
}
def initialize_model():
encoder = 'vitl'
max_depth = 1
model = DepthAnythingV2(**{**model_configs[encoder], 'max_depth': max_depth})
# Load checkpoint
checkpoint = torch.load('checkpoints/model2.pth', map_location='cpu')
# Get state dict
state_dict = {}
for key in checkpoint.keys():
if key not in ['optimizer', 'epoch', 'previous_best']:
state_dict = checkpoint[key]
# Handle module prefix
my_state_dict = {}
for key in state_dict.keys():
new_key = key.replace('module.', '')
my_state_dict[new_key] = state_dict[key]
model.load_state_dict(my_state_dict)
model.eval()
return model
# Initialize model globally
MODEL = initialize_model()
def process_image(input_image):
"""
Process the input image and return depth maps
"""
# Convert from RGB to BGR (since original code uses cv2.imread which loads in BGR)
if input_image is None:
return None, None
input_image = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB_BGR)
# Get depth map
depth = MODEL.infer_image(input_image)
# Normalize depth for visualization (0-255)
depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8)
# Apply colormap for better visualization
depth_colormap = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_INFERNO)
depth_colormap = cv2.cvtColor(depth_colormap, cv2.COLOR_BGR2RGB) # Convert back to RGB for Gradio
return depth_normalized, depth_colormap
# Create Gradio interface
def gradio_interface(input_img):
depth_raw, depth_colored = process_image(input_img)
return [input_img, depth_raw, depth_colored]
# Define interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Image(label="Input Image"),
outputs=[
gr.Image(label="Original Image"),
gr.Image(label="Raw Depth Map"),
gr.Image(label="Colored Depth Map")
],
title="Depth Estimation",
description="Upload an image to generate its depth map.",
examples=["image.jpg"] # Add example images here
)
# Launch the app
if __name__ == "__main__":
iface.launch()