Spaces:

ANASAKHTAR
/

Object_detection_from_Video

Running

Muhammad Anas Akhtar

Create app.py

53319ee verified about 2 months ago

4.77 kB

	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont
	from transformers import pipeline
	import cv2
	import numpy as np
	import tempfile
	import os

	# Initialize the object detection pipeline
	object_detector = pipeline("object-detection",
	model="facebook/detr-resnet-50")

	def draw_bounding_boxes(frame, detections):
	"""
	Draws bounding boxes on the video frame based on the detections.
	"""
	# Convert numpy array to PIL Image
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	pil_image = Image.fromarray(frame_rgb)
	draw = ImageDraw.Draw(pil_image)

	# Use default font
	font = ImageFont.load_default()

	for detection in detections:
	box = detection['box']
	xmin = int(box['xmin'])
	ymin = int(box['ymin'])
	xmax = int(box['xmax'])
	ymax = int(box['ymax'])

	# Draw the bounding box
	draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)

	# Create label with score
	label = detection['label']
	score = detection['score']
	text = f"{label} {score:.2f}"

	# Draw text with background rectangle for visibility
	text_bbox = draw.textbbox((xmin, ymin), text, font=font)
	draw.rectangle([
	(text_bbox[0], text_bbox[1]),
	(text_bbox[2], text_bbox[3])
	], fill="red")
	draw.text((xmin, ymin), text, fill="white", font=font)

	# Convert back to numpy array
	frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
	return frame_with_boxes

	def process_video(video_path):
	"""
	Process the video file and return the path to the processed video
	"""
	try:
	# Open the video file
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	return None

	# Get video properties
	fps = int(cap.get(cv2.CAP_PROP_FPS))
	frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	# Create temporary file for output video
	temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
	output_path = temp_output.name
	temp_output.close()

	# Initialize video writer
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

	frame_count = 0
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Process every nth frame to speed up processing
	process_every_n_frames = 2 # Adjust this value to process more or fewer frames

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame_count += 1

	# Only process every nth frame
	if frame_count % process_every_n_frames == 0:
	# Convert frame to RGB for the model
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Detect objects
	detections = object_detector(frame_rgb)

	# Draw bounding boxes
	frame = draw_bounding_boxes(frame, detections)

	# Write the frame
	out.write(frame)

	# Print progress
	progress = (frame_count / total_frames) * 100
	print(f"Processing: {progress:.1f}% complete", end='\r')

	# Release everything
	cap.release()
	out.release()

	return output_path

	except Exception as e:
	print(f"Error processing video: {str(e)}")
	return None

	def detect_objects_in_video(video):
	"""
	Gradio interface function for video object detection
	"""
	if video is None:
	return None

	try:
	# Process the video
	output_path = process_video(video)
	if output_path is None:
	return None

	return output_path

	except Exception as e:
	print(f"Error during video processing: {str(e)}")
	return None

	# Create the Gradio interface
	demo = gr.Interface(
	fn=detect_objects_in_video,
	inputs=[
	gr.Video(label="Upload Video")
	],
	outputs=[
	gr.Video(label="Processed Video")
	],
	title="@GenAILearniverse Project: Video Object Detection",
	description="""
	Upload a video to detect and track objects within it.
	The application will process the video and draw bounding boxes around detected objects
	with their labels and confidence scores.
	Note: Processing may take some time depending on the video length.
	"""
	)

	if __name__ == "__main__":
	demo.launch()