import shutil from pathlib import Path from typing import Dict, Union, Optional import numpy as np import gradio as gr from ultralytics import YOLO from utils import ( download_model, detect_image, detect_video, get_csv_annotate, get_matplotlib_fig, ) # ======================= MODEL =================================== MODELS_DIR = Path('models') MODELS_DIR.mkdir(exist_ok=True) MODELS = { 'yolov11n.pt': 'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt', 'yolov11s.pt': 'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt', 'yolov11m.pt': 'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt', 'yolov11l.pt': 'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt', 'yolov11x.pt': 'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt', } MODEL_NAMES = list(MODELS.keys()) model_path = download_model(MODEL_NAMES[0], MODELS_DIR, MODELS) default_model = YOLO(model_path) IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png'] VIDEO_EXTENSIONS = ['.mp4', '.avi'] # =================== ADDITIONAL INTERFACE FUNCTIONS ======================== def change_model(model_state: Dict[str, YOLO], model_name: str): progress = gr.Progress() progress(0.3, desc='Downloading the model') model_path = download_model(model_name, MODELS_DIR, MODELS) progress(0.7, desc='Model initialization') model_state['model'] = YOLO(model_path) return f'Model {model_name} initialized' def detect(file_path: str, file_link: str, model_state: Dict[str, YOLO], conf: float, iou: float): model = model_state['model'] if file_link: file_path = file_link file_ext = f'.{file_path.rsplit(".")[-1]}' if file_ext in IMAGE_EXTENSIONS: np_image = detect_image(file_path, model, conf, iou) return np_image, "Detection complete, opening image..." elif file_ext in VIDEO_EXTENSIONS or 'youtube.com' in file_link: video_path = detect_video(file_path, model, conf, iou) return video_path, "Detection complete, converting and opening video..." else: gr.Info('Invalid image or video format...') return None, None # =================== INTERFACE COMPONENTS ============================ def get_output_media_components(detect_result: Optional[Union[np.ndarray, str, Path]] = None): visible = isinstance(detect_result, np.ndarray) image_output = gr.Image( value=detect_result if visible else None, type="numpy", width=640, height=480, visible=visible, label='Output', ) visible = isinstance(detect_result, (str, Path)) video_output = gr.Video( value=detect_result if visible else None, width=640, height=480, visible=visible, label='Output', ) clear_btn = gr.Button( value='Clear', scale=0, visible=detect_result is not None, ) return image_output, video_output, clear_btn def get_download_csv_btn(csv_annotations_path: Optional[Path] = None): download_csv_btn = gr.DownloadButton( label='Download csv annotations for video', value=csv_annotations_path, scale=0, visible=csv_annotations_path is not None, ) return download_csv_btn # =================== APPINTERFACE ========================== css = '''.gradio-container {width: 70% !important}''' with gr.Blocks(css=css) as demo: with gr.Tab('Detection image / video'): gr.HTML("""