import gradio as gr
import cv2
import numpy as np
import os
from PIL import Image
import spaces
import torch

css = """
#img-display-container {
    max-height: 100vh;
    }
#img-display-input {
    max-height: 80vh;
    }
#img-display-output {
    max-height: 80vh;
    }
"""
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
title = "# Stereo Anything"
description = """Official demo for **Stereo Anything: Unifying Stereo Matching with Large-Scale Mixed Data**.
Please refer to our [paper](https://arxiv.org/abs/2411.14053), [github](https://github.com/XiandaGuo/OpenStereo/) for more details."""

@spaces.GPU
@torch.no_grad()
def predict_depth(model, image):
    return model(image)


with gr.Blocks(css=css) as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Markdown("### Depth Prediction demo")
    gr.Markdown("You can slide the output to compare the depth prediction with input image")

    with gr.Row():
        left_image = gr.Image(label="Left Image", type='numpy', elem_id='img-display-input')
        right_image = gr.Image(label="Right Image", type='numpy', elem_id='img-display-input')
        depth_image = gr.Image(label="Depth Image", type='numpy', elem_id='img-display-input')
    # raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)")
    submit = gr.Button("Submit")

    def on_submit(left_image,right_image):
        sample = {
            'left': left_image,
            'right': right_image,
        }
        sample['left'] = sample['left'].unsqueeze(0)
        sample['right'] = sample['right'].unsqueeze(0)

        # model.eval()
        for k, v in sample.items():
            sample[k] = v.to(0) if torch.is_tensor(v) else v

        # model_pred = model(sample)
        model_pred = None

        return [model_pred]

    submit.click(on_submit, inputs=[left_image,right_image], outputs=[depth_image])


if __name__ == '__main__':
    demo.queue().launch()