Spaces:

ANASAKHTAR
/

Object_detection_from_Video

Running

App Files Files Community

Muhammad Anas Akhtar commited on Dec 7, 2024

Commit

53319ee

verified ·

1 Parent(s): 0c2bcb4

Create app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import gradio as gr
+from PIL import Image, ImageDraw, ImageFont
+from transformers import pipeline
+import cv2
+import numpy as np
+import tempfile
+import os
+# Initialize the object detection pipeline
+object_detector = pipeline("object-detection",
+                         model="facebook/detr-resnet-50")
+def draw_bounding_boxes(frame, detections):
+    """
+    Draws bounding boxes on the video frame based on the detections.
+    """
+    # Convert numpy array to PIL Image
+    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    pil_image = Image.fromarray(frame_rgb)
+    draw = ImageDraw.Draw(pil_image)
+    # Use default font
+    font = ImageFont.load_default()
+    for detection in detections:
+        box = detection['box']
+        xmin = int(box['xmin'])
+        ymin = int(box['ymin'])
+        xmax = int(box['xmax'])
+        ymax = int(box['ymax'])
+        # Draw the bounding box
+        draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)
+        # Create label with score
+        label = detection['label']
+        score = detection['score']
+        text = f"{label} {score:.2f}"
+        # Draw text with background rectangle for visibility
+        text_bbox = draw.textbbox((xmin, ymin), text, font=font)
+        draw.rectangle([
+            (text_bbox[0], text_bbox[1]),
+            (text_bbox[2], text_bbox[3])
+        ], fill="red")
+        draw.text((xmin, ymin), text, fill="white", font=font)
+    # Convert back to numpy array
+    frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+    return frame_with_boxes
+def process_video(video_path):
+    """
+    Process the video file and return the path to the processed video
+    """
+    try:
+        # Open the video file
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            return None
+        # Get video properties
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        # Create temporary file for output video
+        temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+        output_path = temp_output.name
+        temp_output.close()
+        # Initialize video writer
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
+        frame_count = 0
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Process every nth frame to speed up processing
+        process_every_n_frames = 2  # Adjust this value to process more or fewer frames
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_count += 1
+            # Only process every nth frame
+            if frame_count % process_every_n_frames == 0:
+                # Convert frame to RGB for the model
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                # Detect objects
+                detections = object_detector(frame_rgb)
+                # Draw bounding boxes
+                frame = draw_bounding_boxes(frame, detections)
+            # Write the frame
+            out.write(frame)
+            # Print progress
+            progress = (frame_count / total_frames) * 100
+            print(f"Processing: {progress:.1f}% complete", end='\r')
+        # Release everything
+        cap.release()
+        out.release()
+        return output_path
+    except Exception as e:
+        print(f"Error processing video: {str(e)}")
+        return None
+def detect_objects_in_video(video):
+    """
+    Gradio interface function for video object detection
+    """
+    if video is None:
+        return None
+    try:
+        # Process the video
+        output_path = process_video(video)
+        if output_path is None:
+            return None
+        return output_path
+    except Exception as e:
+        print(f"Error during video processing: {str(e)}")
+        return None
+# Create the Gradio interface
+demo = gr.Interface(
+    fn=detect_objects_in_video,
+    inputs=[
+        gr.Video(label="Upload Video")
+    ],
+    outputs=[
+        gr.Video(label="Processed Video")
+    ],
+    title="@GenAILearniverse Project: Video Object Detection",
+    description="""
+    Upload a video to detect and track objects within it.
+    The application will process the video and draw bounding boxes around detected objects
+    with their labels and confidence scores.
+    Note: Processing may take some time depending on the video length.
+    """
+)
+if __name__ == "__main__":
+    demo.launch()