|
import gradio as gr |
|
from PIL import Image, ImageDraw, ImageFont |
|
from transformers import pipeline |
|
import cv2 |
|
import numpy as np |
|
import tempfile |
|
import os |
|
|
|
|
|
object_detector = pipeline("object-detection", |
|
model="facebook/detr-resnet-50") |
|
|
|
def draw_bounding_boxes(frame, detections): |
|
""" |
|
Draws bounding boxes on the video frame based on the detections. |
|
""" |
|
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
pil_image = Image.fromarray(frame_rgb) |
|
draw = ImageDraw.Draw(pil_image) |
|
|
|
|
|
font = ImageFont.load_default() |
|
|
|
for detection in detections: |
|
box = detection['box'] |
|
xmin = int(box['xmin']) |
|
ymin = int(box['ymin']) |
|
xmax = int(box['xmax']) |
|
ymax = int(box['ymax']) |
|
|
|
|
|
draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3) |
|
|
|
|
|
label = detection['label'] |
|
score = detection['score'] |
|
text = f"{label} {score:.2f}" |
|
|
|
|
|
text_bbox = draw.textbbox((xmin, ymin), text, font=font) |
|
draw.rectangle([ |
|
(text_bbox[0], text_bbox[1]), |
|
(text_bbox[2], text_bbox[3]) |
|
], fill="red") |
|
draw.text((xmin, ymin), text, fill="white", font=font) |
|
|
|
|
|
frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) |
|
return frame_with_boxes |
|
|
|
def process_video(video_path): |
|
""" |
|
Process the video file and return the path to the processed video |
|
""" |
|
try: |
|
|
|
cap = cv2.VideoCapture(video_path) |
|
if not cap.isOpened(): |
|
return None |
|
|
|
|
|
fps = int(cap.get(cv2.CAP_PROP_FPS)) |
|
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
|
|
|
temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) |
|
output_path = temp_output.name |
|
temp_output.close() |
|
|
|
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height)) |
|
|
|
frame_count = 0 |
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
|
|
|
process_every_n_frames = 2 |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
frame_count += 1 |
|
|
|
|
|
if frame_count % process_every_n_frames == 0: |
|
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
detections = object_detector(frame_rgb) |
|
|
|
|
|
frame = draw_bounding_boxes(frame, detections) |
|
|
|
|
|
out.write(frame) |
|
|
|
|
|
progress = (frame_count / total_frames) * 100 |
|
print(f"Processing: {progress:.1f}% complete", end='\r') |
|
|
|
|
|
cap.release() |
|
out.release() |
|
|
|
return output_path |
|
|
|
except Exception as e: |
|
print(f"Error processing video: {str(e)}") |
|
return None |
|
|
|
def detect_objects_in_video(video): |
|
""" |
|
Gradio interface function for video object detection |
|
""" |
|
if video is None: |
|
return None |
|
|
|
try: |
|
|
|
output_path = process_video(video) |
|
if output_path is None: |
|
return None |
|
|
|
return output_path |
|
|
|
except Exception as e: |
|
print(f"Error during video processing: {str(e)}") |
|
return None |
|
|
|
|
|
demo = gr.Interface( |
|
fn=detect_objects_in_video, |
|
inputs=[ |
|
gr.Video(label="Upload Video") |
|
], |
|
outputs=[ |
|
gr.Video(label="Processed Video") |
|
], |
|
title="@GenAILearniverse Project: Video Object Detection", |
|
description=""" |
|
Upload a video to detect and track objects within it. |
|
The application will process the video and draw bounding boxes around detected objects |
|
with their labels and confidence scores. |
|
Note: Processing may take some time depending on the video length. |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |