import gradio as gr import cv2 import numpy as np import os from PIL import Image import spaces import torch css = """ #img-display-container { max-height: 100vh; } #img-display-input { max-height: 80vh; } #img-display-output { max-height: 80vh; } """ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' title = "# Stereo Anything" description = """Official demo for **Stereo Anything: Unifying Stereo Matching with Large-Scale Mixed Data**. Please refer to our [paper](https://arxiv.org/abs/2411.14053), [github](https://github.com/XiandaGuo/OpenStereo/) for more details.""" @spaces.GPU @torch.no_grad() def predict_depth(model, image): return model(image) with gr.Blocks(css=css) as demo: gr.Markdown(title) gr.Markdown(description) gr.Markdown("### Depth Prediction demo") gr.Markdown("You can slide the output to compare the depth prediction with input image") with gr.Row(): left_image = gr.Image(label="Left Image", type='numpy', elem_id='img-display-input') right_image = gr.Image(label="Right Image", type='numpy', elem_id='img-display-input') depth_image = gr.Image(label="Depth Image", type='numpy', elem_id='img-display-input') # raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)") submit = gr.Button("Submit") def on_submit(left_image,right_image): sample = { 'left': left_image, 'right': right_image, } sample['left'] = sample['left'].unsqueeze(0) sample['right'] = sample['right'].unsqueeze(0) # model.eval() for k, v in sample.items(): sample[k] = v.to(0) if torch.is_tensor(v) else v # model_pred = model(sample) model_pred = None return [model_pred] submit.click(on_submit, inputs=[left_image,right_image], outputs=[depth_image]) if __name__ == '__main__': demo.queue().launch()