import json import gradio as gr import yolov5 from PIL import Image from huggingface_hub import hf_hub_download import os import cv2 app_title = "Detect san pham VSK" models_ids = ['linhcuem/gold_yolov5m','linhcuem/yolov5m_chamdiem_raw13','linhcuem/yolov5m_cham_diemraw15','linhcuem/yolov5m6_raw17_yaml', 'linhcuem/yolov5m_chamdiem_ver1', 'linhcuem/cham_diemraw16', 'linhcuem/yolov5m_chamdiem_ver2', 'linhcuem/yolov5m6_cham_diemraw17','linhcuem/yolov5m_chamdiem_ver7', 'linhcuem/yolov5m_chamdiem_ver8', 'linhcuem/yolov5m_chamdiem_ver10', 'linhcuem/yolov5_chamdiem_ver9', 'linhcuem/yolo5m_chamdiem_ver11', 'linhcuem/yolov5_chamdiem_ver12', 'linhcuem/yolov5_chamdiem_ver15_300epochs', 'linhcuem/yolov5_chamdiem_ver15', 'linhcuem/yolov5_chamdiem_ver13', 'linhcuem/yolov5_chamdiem_ver17', 'linhcuem/yolov5_chamdiem_ver16', 'linhcuem/yolov5_chamdiem_ver18'] current_model_id = models_ids[-1] model = yolov5.load(current_model_id) examples = [['test_images/yen thien viet_4.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_6.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_7.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_7.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_8.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_9.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_94.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_13.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_16.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_19.jpg', 0.25, 'linhcuem/gold_yolov5m'], ['test_images/yen thien viet_13.jpg', 0.25, 'linhcuem/gold_yolov5m']] def predict(image, threshold=0.25, model_id=None): #update model if required global current_model_id global model if model_id != current_model_id: model = yolov5.load(model_id) # model_yolov8 = YOLO(DEFAULT_DET_MODEL_ID_yolov8) current_model_id = model_id # get model input size config_path = hf_hub_download(repo_id=model_id, filename="config.json") with open(config_path, "r") as f: config = json.load(f) input_size = config["input_size"] #perform inference model.conf = threshold results = model(image, size=input_size) count_result = results.pandas().xyxy[0].value_counts('name') numpy_image = results.render()[0] output_image = Image.fromarray(numpy_image) return output_image, count_result def show_pred_vid( video_path: str = None, model_path: str = None, image_size: int = 640, conf_threshold: float = 0.25, iou_threshold: float = 0.45, ): cap = cv2.VideoCapture(video_path) while cap.isOpened(): success, frame = cap.read() if success: model = YOLO(model_path) model.overrides['conf'] = conf_threshold model.overrides['iou'] = iou_threshold model.overrides['agnostic_nms'] = False model.overrides['max_det'] = 1000 results = model.predict(frame) annotated_frame = results[0].plot() if cv2.waitKey(1) & 0xFF == ord("q"): break else: break cap.release() cv2.destroyAllWindows() inputs_vid = [ gr.components.Video(type="filepath", label="Input Video"), gr.inputs.Dropdown(["linhcuem/yolov5_chamdiem_ver13"], default="linhcuem/yolov5_chamdiem_ver13", label="Model"), gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label= "Image Size"), gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"), gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold"), ] outputs_vid = gr.outputs.Image(type="filepath", label="Output Video") interface_vid = gr.Interface( fn=show_pred_vid, inputs = inputs_vid, outputs = outputs_vid, title = app_title, description=description, cache_examples=False, theme='huggingface' ) interface_image = gr.Interface( title=app_title, description="DO ANH DAT", fn=predict, inputs=[ gr.Image(type="pil"), gr.Slider(maximum=1, step=0.01, value=0.25), gr.Dropdown(models_ids, value=models_ids[-1]), ], outputs=[gr.Image(type="pil"),gr.Textbox(show_label=False)], examples=examples, cache_examples=True if examples else Fale, ) gr.TabbedInterface( [interface_image, interface_vid], tab_names=['Image inferece', 'Video inference'] ).launch()