import gradio as gr import torch # from sahi.prediction import ObjectPrediction # from sahi.utils.cv import visualize_object_predictions, read_image import os import requests import json import cv2 from PIL import Image from huggingface_hub import hf_hub_download from ultralyticsplus import YOLO, render_result # from ultralyticsplus import render_result # import requests # import cv2 image_path = [['test_images/2a998cfb0901db5f8210.jpg','cham_diem_yolov8', 640, 0.25, 0.45],['test_images/2ce19ce0191acb44920b.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/2daab6ea3310e14eb801.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/4a137deefb14294a7005 (1).jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/7e77c596436c9132c87d.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/170f914014bac6e49fab.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/3355ec3269c8bb96e2d9.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/546306a88052520c0b43.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/33148464019ed3c08a8f.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/a17a992a1cd0ce8e97c1.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/b5db5e42d8b80ae653a9 (1).jpg','cham_diem_yolov8', 640, 0.25, 0.45],['test_images/b8ee1f5299a84bf612b9.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/b272fec7783daa63f32c.jpg','cham_diem_yolov8', 640, 0.25, 0.45],['test_images/bb202b3eaec47c9a25d5.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/bf1e22b0a44a76142f5b.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/ea5473c5f53f27617e2e.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/ee106392e56837366e79.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/f88d2214a4ee76b02fff.jpg','cham_diem_yolov8', 640, 0.25, 0.45]] # Load YOLO model # model = YOLO('linhcuem/cham_diem_yolov8') # model = YOLO('linhcuem/chamdiemgianhang_yolov8_ver1') model = YOLO('linhcuem/cham_diem_yolov8_ver20') ################################################### def yolov8_img_inference( image, model_path= None, image_size= 640, conf_threshold= 0.25, iou_threshold = 0.45, ): # model = YOLO(model_path) model.conf = conf_threshold model.iou = iou_threshold # model.overrides['conf'] = conf_threshold # model.overrides['iou'] = iou_threshold # model.overrides['agnostic_nms'] = False # model.overrides['max_det'] = 1000 # image = read_image results = model.predict(image, imgsz=image_size, conf=conf_threshold, iou=iou_threshold) render = render_result(model=model, image=image, result=results[0]) # get the model names list names = model.names # get the 'obj' class id # obj_id = list(names)[list(names.values()).index('lo_ytv')] # ('hop_dln','hop_jn','hop_vtg','hop_ytv','lo_kids', 'lo_ytv','loc_dln','loc_jn','loc_kids','loc_ytv')] # obj_id = list(names)[list(names.values()).index([0])] # count 'car' objects in the results # count_result = results[0].boxes.cls[0].item() #count_result = results[0]boxes.cls[0].tolist() object_counts = {x: 0 for x in names} for r in results: for c in r.boxes.cls: c = int(c) if c in names: object_counts[c] += 1 elif c not in names: object_counts[c] = 1 present_objects = object_counts.copy() for i in object_counts: if object_counts[i] < 1: present_objects.pop(i) return render, {names[k]: v for k, v in present_objects.items()} # results = model.predict(image, imgsz=image_size, return_outputs=True) # results = model.predict(image) # object_prediction_list = [] # for _, image_results in enumerate(results): # if len(image_results)!=0: # image_predictions_in_xyxy_format = image_results['det'] # for pred in image_predictions_in_xyxy_format: # x1, y1, x2, y2 = ( # int(pred[0]), # int(pred[1]), # int(pred[2]), # int(pred[3]), # ) # bbox = [x1, y1, x2, y2] # score = pred[4] # category_name = model.model.names[int(pred[5])] # category_id = pred[5] # object_prediction = ObjectPrediction( # bbox=bbox, # category_id=int(category_id), # score=score, # category_name=category_name, # ) # object_prediction_list.append(object_prediction) # image = read_image(image) # output_image = visualize_object_predictions(image=image, object_prediction_list=object_prediction_list) # return output_image['image'] # render = render_result(model=model, image=image, result=results[0]) def yolov8_vid_inference(video_path): cap = cv2.VideoCapture(video_path) while cap.isOpened(): success, frame = cap.read() if success: frame_copy = frame.copy() outputs = model.predict(source=frame) results = outputs[0].cpu().numpy() for i, det in enumerate(results.boxes.xyxy): cv2.rectangle( frame_copy, (int(det[0]), int(det[1])), (int(det[2]), int(det[3])), color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA ) yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB) inputs_vid = [ gr.components.Video(type="filepath", label="Input Video"), ] outputs_vid = [ gr.components.Image(type="numpy", label="Output Image"), ] interface_vid = gr.Interface( fn=yolov8_vid_inference, inputs = inputs_vid, outputs = outputs_vid, title = "Detect Thiên Việt productions", cache_examples = False, ) inputs_image = [ # gr.inputs.Image(type="filepath", label="Input Image"), gr.Image(type="pil"), gr.Dropdown(["linhcuem/linhcuem/chamdiemgianhang_yolov8_ver1"], default="linhcuem/chamdiemgianhang_yolov8_ver1", label="Model"), gr.Slider(maximum=1280, step=32, value = 640, label="Image Size"), gr.Slider(maximum=1.0 , step=0.05, value = 0.25, label="Confidence Threshold"), gr.Slider(maximum=1.0, step=0.05, value = 0.45, label="IOU Threshold"), ] # outputs_image =gr.outputs.Image(type="filepath", label="Output Image") # count_obj = gr.Textbox(show_label=False) title = "Detect Thiên Việt productions" interface_image = gr.Interface( fn=yolov8_img_inference, inputs=[ gr.Image(type='pil'), gr.Dropdown(["linhcuem/chamdiemgianhang_yolov8_ver1"], default="linhcuem/chamdiemgianhang_yolov8_ver1"), gr.Slider(maximum=1280, step=32, value=640), gr.Slider(maximum=1.0, step=0.05, value=0.25), gr.Slider(maximum=1.0, step=0.05, value=0.45), ], outputs=[gr.Image(type="pil"),gr.Textbox(show_label=False)], title=title, examples=image_path, cache_examples=True if image_path else False, ) gr.TabbedInterface( [interface_image, interface_vid], tab_names=['Image inference', 'Video inference'] ).queue().launch() # interface_image.launch(debug=True, enable_queue=True)