import json
import gradio as gr
import yolov5
from PIL import Image
from huggingface_hub import hf_hub_download
import os 
import cv2 


app_title = "Detect san pham VSK"
models_ids = ['linhcuem/gold_yolov5m','linhcuem/yolov5m_chamdiem_raw13','linhcuem/yolov5m_cham_diemraw15','linhcuem/yolov5m6_raw17_yaml', 'linhcuem/yolov5m_chamdiem_ver1',
             'linhcuem/cham_diemraw16', 'linhcuem/yolov5m_chamdiem_ver2', 'linhcuem/yolov5m6_cham_diemraw17','linhcuem/yolov5m_chamdiem_ver7', 'linhcuem/yolov5m_chamdiem_ver8', 'linhcuem/yolov5m_chamdiem_ver10',
             'linhcuem/yolov5_chamdiem_ver9', 'linhcuem/yolo5m_chamdiem_ver11', 'linhcuem/yolov5_chamdiem_ver12', 'linhcuem/yolov5_chamdiem_ver15_300epochs', 'linhcuem/yolov5_chamdiem_ver15', 'linhcuem/yolov5_chamdiem_ver13',
             'linhcuem/yolov5_chamdiem_ver17', 'linhcuem/yolov5_chamdiem_ver16', 'linhcuem/yolov5_chamdiem_ver18']

current_model_id = models_ids[-1]
model = yolov5.load(current_model_id)
examples = [['test_images/yen thien viet_4.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_6.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_7.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_7.jpg', 0.25, 'linhcuem/gold_yolov5m'],
           ['test_images/yen thien viet_8.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_9.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_94.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_13.jpg', 0.25, 'linhcuem/gold_yolov5m'],
           ['test_images/yen thien viet_16.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_19.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_13.jpg', 0.25, 'linhcuem/gold_yolov5m']]
def predict(image, threshold=0.25, model_id=None):
    #update model if required
    global current_model_id
    global model
    if model_id != current_model_id:
        model = yolov5.load(model_id)
        # model_yolov8 = YOLO(DEFAULT_DET_MODEL_ID_yolov8)
        current_model_id = model_id
    
    # get model input size
    config_path = hf_hub_download(repo_id=model_id, filename="config.json")
    with open(config_path, "r") as f:
        config = json.load(f)
    input_size = config["input_size"]

    #perform inference
    model.conf = threshold
    results = model(image, size=input_size)
    count_result = results.pandas().xyxy[0].value_counts('name')
    numpy_image = results.render()[0]
    output_image = Image.fromarray(numpy_image)
    return output_image, count_result

def show_pred_vid(
    video_path: str = None,
    model_path: str = None,
    image_size: int = 640,
    conf_threshold: float = 0.25,
    iou_threshold: float = 0.45,
):
    cap = cv2.VideoCapture(video_path)

    while cap.isOpened():
        success, frame = cap.read()

        if success:
            model = YOLO(model_path)
            model.overrides['conf'] = conf_threshold
            model.overrides['iou'] = iou_threshold
            model.overrides['agnostic_nms'] = False
            model.overrides['max_det'] = 1000
            results = model.predict(frame)
            annotated_frame = results[0].plot()

            if cv2.waitKey(1) & 0xFF == ord("q"):
                break
        else:
            break
    cap.release()
    cv2.destroyAllWindows()

inputs_vid = [
    gr.components.Video(type="filepath", label="Input Video"),
    gr.inputs.Dropdown(["linhcuem/yolov5_chamdiem_ver13"], default="linhcuem/yolov5_chamdiem_ver13", label="Model"),
    gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label= "Image Size"),
    gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
    gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold"),
]

outputs_vid = gr.outputs.Image(type="filepath", label="Output Video")
interface_vid = gr.Interface(
    fn=show_pred_vid,
    inputs = inputs_vid,
    outputs = outputs_vid,
    title = app_title,
    description=description,
    cache_examples=False,
    theme='huggingface'
)

    
interface_image = gr.Interface(
    title=app_title,
    description="DO ANH DAT",
    fn=predict,
    inputs=[
        gr.Image(type="pil"),
        gr.Slider(maximum=1, step=0.01, value=0.25),
        gr.Dropdown(models_ids, value=models_ids[-1]),
    ],
    outputs=[gr.Image(type="pil"),gr.Textbox(show_label=False)],
    examples=examples,
    cache_examples=True if examples else Fale,
)

gr.TabbedInterface(
    [interface_image, interface_vid],
    tab_names=['Image inferece', 'Video inference']
).launch()