Muhammad Anas Akhtar commited on
Commit
53319ee
·
verified ·
1 Parent(s): 0c2bcb4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image, ImageDraw, ImageFont
3
+ from transformers import pipeline
4
+ import cv2
5
+ import numpy as np
6
+ import tempfile
7
+ import os
8
+
9
+ # Initialize the object detection pipeline
10
+ object_detector = pipeline("object-detection",
11
+ model="facebook/detr-resnet-50")
12
+
13
+ def draw_bounding_boxes(frame, detections):
14
+ """
15
+ Draws bounding boxes on the video frame based on the detections.
16
+ """
17
+ # Convert numpy array to PIL Image
18
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
19
+ pil_image = Image.fromarray(frame_rgb)
20
+ draw = ImageDraw.Draw(pil_image)
21
+
22
+ # Use default font
23
+ font = ImageFont.load_default()
24
+
25
+ for detection in detections:
26
+ box = detection['box']
27
+ xmin = int(box['xmin'])
28
+ ymin = int(box['ymin'])
29
+ xmax = int(box['xmax'])
30
+ ymax = int(box['ymax'])
31
+
32
+ # Draw the bounding box
33
+ draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)
34
+
35
+ # Create label with score
36
+ label = detection['label']
37
+ score = detection['score']
38
+ text = f"{label} {score:.2f}"
39
+
40
+ # Draw text with background rectangle for visibility
41
+ text_bbox = draw.textbbox((xmin, ymin), text, font=font)
42
+ draw.rectangle([
43
+ (text_bbox[0], text_bbox[1]),
44
+ (text_bbox[2], text_bbox[3])
45
+ ], fill="red")
46
+ draw.text((xmin, ymin), text, fill="white", font=font)
47
+
48
+ # Convert back to numpy array
49
+ frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
50
+ return frame_with_boxes
51
+
52
+ def process_video(video_path):
53
+ """
54
+ Process the video file and return the path to the processed video
55
+ """
56
+ try:
57
+ # Open the video file
58
+ cap = cv2.VideoCapture(video_path)
59
+ if not cap.isOpened():
60
+ return None
61
+
62
+ # Get video properties
63
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
64
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
65
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
66
+
67
+ # Create temporary file for output video
68
+ temp_output = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
69
+ output_path = temp_output.name
70
+ temp_output.close()
71
+
72
+ # Initialize video writer
73
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
74
+ out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
75
+
76
+ frame_count = 0
77
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
78
+
79
+ # Process every nth frame to speed up processing
80
+ process_every_n_frames = 2 # Adjust this value to process more or fewer frames
81
+
82
+ while cap.isOpened():
83
+ ret, frame = cap.read()
84
+ if not ret:
85
+ break
86
+
87
+ frame_count += 1
88
+
89
+ # Only process every nth frame
90
+ if frame_count % process_every_n_frames == 0:
91
+ # Convert frame to RGB for the model
92
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
93
+
94
+ # Detect objects
95
+ detections = object_detector(frame_rgb)
96
+
97
+ # Draw bounding boxes
98
+ frame = draw_bounding_boxes(frame, detections)
99
+
100
+ # Write the frame
101
+ out.write(frame)
102
+
103
+ # Print progress
104
+ progress = (frame_count / total_frames) * 100
105
+ print(f"Processing: {progress:.1f}% complete", end='\r')
106
+
107
+ # Release everything
108
+ cap.release()
109
+ out.release()
110
+
111
+ return output_path
112
+
113
+ except Exception as e:
114
+ print(f"Error processing video: {str(e)}")
115
+ return None
116
+
117
+ def detect_objects_in_video(video):
118
+ """
119
+ Gradio interface function for video object detection
120
+ """
121
+ if video is None:
122
+ return None
123
+
124
+ try:
125
+ # Process the video
126
+ output_path = process_video(video)
127
+ if output_path is None:
128
+ return None
129
+
130
+ return output_path
131
+
132
+ except Exception as e:
133
+ print(f"Error during video processing: {str(e)}")
134
+ return None
135
+
136
+ # Create the Gradio interface
137
+ demo = gr.Interface(
138
+ fn=detect_objects_in_video,
139
+ inputs=[
140
+ gr.Video(label="Upload Video")
141
+ ],
142
+ outputs=[
143
+ gr.Video(label="Processed Video")
144
+ ],
145
+ title="@GenAILearniverse Project: Video Object Detection",
146
+ description="""
147
+ Upload a video to detect and track objects within it.
148
+ The application will process the video and draw bounding boxes around detected objects
149
+ with their labels and confidence scores.
150
+ Note: Processing may take some time depending on the video length.
151
+ """
152
+ )
153
+
154
+ if __name__ == "__main__":
155
+ demo.launch()