binuser007 commited on
Commit
b41b5c5
·
verified ·
1 Parent(s): 4902f03

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.py +40 -0
  2. h.py +429 -0
  3. requirements.txt +12 -0
config.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+
3
+ class Config:
4
+ # Model configurations with descriptions
5
+ YOLO_MODELS = {
6
+ "yolov8n.pt": "YOLOv8 Nano - Fastest and smallest model, best for CPU/edge devices",
7
+ "yolov8s.pt": "YOLOv8 Small - Good balance of speed and accuracy",
8
+ "yolov8m.pt": "YOLOv8 Medium - Better accuracy, still reasonable speed",
9
+ "yolov8l.pt": "YOLOv8 Large - High accuracy, slower speed",
10
+ "yolov8x.pt": "YOLOv8 XLarge - Highest accuracy, slowest speed",
11
+ # Pose estimation models
12
+ "yolov8n-pose.pt": "YOLOv8 Nano Pose - Fast pose estimation",
13
+ "yolov8s-pose.pt": "YOLOv8 Small Pose - Balanced pose estimation",
14
+ "yolov8m-pose.pt": "YOLOv8 Medium Pose - Accurate pose estimation",
15
+ "yolov8l-pose.pt": "YOLOv8 Large Pose - High accuracy pose estimation",
16
+ "yolov8x-pose.pt": "YOLOv8 XLarge Pose - Most accurate pose estimation",
17
+ # Segmentation models
18
+ "yolov8n-seg.pt": "YOLOv8 Nano Segmentation - Fast instance segmentation",
19
+ "yolov8s-seg.pt": "YOLOv8 Small Segmentation - Balanced segmentation",
20
+ "yolov8m-seg.pt": "YOLOv8 Medium Segmentation - Accurate segmentation",
21
+ "yolov8l-seg.pt": "YOLOv8 Large Segmentation - High accuracy segmentation",
22
+ "yolov8x-seg.pt": "YOLOv8 XLarge Segmentation - Most accurate segmentation"
23
+ }
24
+
25
+ AVAILABLE_MODELS: List[str] = list(YOLO_MODELS.keys())
26
+ DEFAULT_MODEL: str = "yolov8s.pt"
27
+
28
+ # File configurations
29
+ ALLOWED_IMAGE_TYPES: List[str] = ["jpg", "jpeg", "png"]
30
+ ALLOWED_VIDEO_TYPES: List[str] = ["mp4", "mov", "avi"]
31
+
32
+ # Video processing
33
+ TEMP_DIR: str = "temp"
34
+ VIDEO_OUTPUT_FORMAT: str = "mp4v"
35
+
36
+ # UI configurations
37
+ CONFIDENCE_THRESHOLD: float = 0.25 # Lowered for better detection
38
+ BBOX_COLOR: tuple = (0, 255, 0)
39
+ FONT_SCALE: float = 0.5
40
+ FONT_THICKNESS: int = 2
h.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import tempfile
4
+ import requests
5
+ import base64
6
+ import numpy as np
7
+ import logging
8
+ from dataclasses import dataclass
9
+ from typing import Optional, Union, Tuple
10
+ from PIL import Image
11
+ from io import BytesIO
12
+ from ultralytics import YOLO
13
+ import streamlit as st
14
+ import yt_dlp as youtube_dl
15
+ from config import Config
16
+ import time
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ @dataclass
23
+ class DetectionResult:
24
+ """Data class to store detection results"""
25
+ success: bool
26
+ image: Optional[np.ndarray] = None
27
+ error_message: Optional[str] = None
28
+
29
+ class YOLOModel:
30
+ """Class to handle YOLO model operations"""
31
+ def __init__(self, model_name: str = Config.DEFAULT_MODEL):
32
+ self.model_name = model_name # Store model name
33
+ self.model = self._load_model(model_name)
34
+
35
+ def _load_model(self, model_name: str) -> Optional[YOLO]:
36
+ """Load YOLO model with error handling"""
37
+ try:
38
+ return YOLO(model_name)
39
+ except Exception as e:
40
+ logger.error(f"Error loading model: {e}")
41
+ return None
42
+
43
+ def detect_objects(self, image: np.ndarray) -> DetectionResult:
44
+ """Perform object detection on the input image"""
45
+ if self.model is None:
46
+ return DetectionResult(False, error_message="Model not loaded")
47
+
48
+ try:
49
+ results = self.model(image)
50
+ annotated_image = image.copy()
51
+
52
+ for result in results[0].boxes:
53
+ x1, y1, x2, y2 = map(int, result.xyxy[0])
54
+ label = self.model.names[int(result.cls)]
55
+ confidence = result.conf.item()
56
+
57
+ if confidence < Config.CONFIDENCE_THRESHOLD:
58
+ continue
59
+
60
+ cv2.rectangle(
61
+ annotated_image,
62
+ (x1, y1),
63
+ (x2, y2),
64
+ Config.BBOX_COLOR,
65
+ 2
66
+ )
67
+ label_text = f'{label} {confidence:.2f}'
68
+ cv2.putText(
69
+ annotated_image,
70
+ label_text,
71
+ (x1, y1 - 10),
72
+ cv2.FONT_HERSHEY_SIMPLEX,
73
+ Config.FONT_SCALE,
74
+ Config.BBOX_COLOR,
75
+ Config.FONT_THICKNESS
76
+ )
77
+
78
+ return DetectionResult(True, annotated_image)
79
+ except Exception as e:
80
+ logger.error(f"Error during object detection: {e}")
81
+ return DetectionResult(False, error_message=str(e))
82
+
83
+ class ImageProcessor:
84
+ """Class to handle image processing operations"""
85
+ def __init__(self, model: YOLOModel):
86
+ self.model = model
87
+
88
+ def process_image(self, image: Union[Image.Image, str]) -> DetectionResult:
89
+ """Process image from various sources (PIL Image or URL)"""
90
+ try:
91
+ if isinstance(image, str):
92
+ image = self._load_image_from_url(image)
93
+
94
+ if image is None:
95
+ return DetectionResult(False, error_message="Failed to load image")
96
+
97
+ np_image = np.array(image)
98
+ return self.model.detect_objects(np_image)
99
+ except Exception as e:
100
+ logger.error(f"Error processing image: {e}")
101
+ return DetectionResult(False, error_message=str(e))
102
+
103
+ def _load_image_from_url(self, url: str) -> Optional[Image.Image]:
104
+ """Load image from URL with support for base64"""
105
+ try:
106
+ if url.startswith('data:image'):
107
+ header, encoded = url.split(',', 1)
108
+ image_data = base64.b64decode(encoded)
109
+ return Image.open(BytesIO(image_data))
110
+ else:
111
+ response = requests.get(url)
112
+ response.raise_for_status()
113
+ return Image.open(BytesIO(response.content))
114
+ except Exception as e:
115
+ logger.error(f"Error loading image from URL: {e}")
116
+ return None
117
+
118
+ class VideoProcessor:
119
+ """Class to handle video processing operations"""
120
+ def __init__(self, model: YOLOModel):
121
+ self.model = model
122
+ os.makedirs(Config.TEMP_DIR, exist_ok=True)
123
+
124
+ def process_video(self, input_path: str) -> Tuple[bool, str]:
125
+ """Process video file and return path to processed video"""
126
+ if not os.path.exists(input_path):
127
+ return False, "Input video file not found"
128
+
129
+ try:
130
+ cap = cv2.VideoCapture(input_path)
131
+ if not cap.isOpened():
132
+ return False, "Failed to open video file"
133
+
134
+ # Generate unique output filename
135
+ timestamp = int(time.time())
136
+ output_filename = f"processed_{timestamp}.mp4"
137
+ temp_output = os.path.join(Config.TEMP_DIR, f"temp_{output_filename}")
138
+ final_output = os.path.join(Config.TEMP_DIR, output_filename)
139
+
140
+ # Get video properties
141
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
142
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
143
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
144
+
145
+ # Initialize video writer with h264 codec
146
+ if os.name == 'nt': # Windows
147
+ fourcc = cv2.VideoWriter_fourcc(*'avc1')
148
+ else: # Linux/Mac
149
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
150
+
151
+ out = cv2.VideoWriter(
152
+ temp_output,
153
+ fourcc,
154
+ fps,
155
+ (frame_width, frame_height)
156
+ )
157
+
158
+ frame_count = 0
159
+ while cap.isOpened():
160
+ ret, frame = cap.read()
161
+ if not ret:
162
+ break
163
+
164
+ # Process every frame
165
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
166
+ result = self.model.detect_objects(rgb_frame)
167
+
168
+ if result.success:
169
+ processed_frame = cv2.cvtColor(result.image, cv2.COLOR_RGB2BGR)
170
+ out.write(processed_frame)
171
+ else:
172
+ out.write(frame)
173
+
174
+ frame_count += 1
175
+ if frame_count % 30 == 0: # Log progress every 30 frames
176
+ logger.info(f"Processed {frame_count} frames")
177
+
178
+ # Release video resources
179
+ cap.release()
180
+ out.release()
181
+ cv2.destroyAllWindows()
182
+
183
+ # Convert to browser-compatible format using ffmpeg
184
+ try:
185
+ # Construct ffmpeg command
186
+ ffmpeg_cmd = [
187
+ 'ffmpeg',
188
+ '-y', # Overwrite output file if it exists
189
+ '-i', temp_output, # Input file
190
+ '-c:v', 'libx264', # Video codec
191
+ '-preset', 'medium', # Encoding speed preset
192
+ '-movflags', '+faststart', # Enable fast start for web playback
193
+ '-pix_fmt', 'yuv420p', # Pixel format for maximum compatibility
194
+ final_output # Output file
195
+ ]
196
+
197
+ # Run ffmpeg command
198
+ import subprocess
199
+ process = subprocess.Popen(
200
+ ffmpeg_cmd,
201
+ stdout=subprocess.PIPE,
202
+ stderr=subprocess.PIPE
203
+ )
204
+ stdout, stderr = process.communicate()
205
+
206
+ if process.returncode != 0:
207
+ logger.error(f"FFmpeg error: {stderr.decode()}")
208
+ return False, f"FFmpeg conversion failed: {stderr.decode()}"
209
+
210
+ # Clean up temporary file
211
+ if os.path.exists(temp_output):
212
+ os.remove(temp_output)
213
+
214
+ return True, final_output
215
+
216
+ except Exception as e:
217
+ logger.error(f"Error during ffmpeg conversion: {e}")
218
+ return False, f"Error during video conversion: {str(e)}"
219
+
220
+ except Exception as e:
221
+ logger.error(f"Error processing video: {e}")
222
+ return False, str(e)
223
+ finally:
224
+ # Ensure resources are released
225
+ if 'cap' in locals() and cap is not None:
226
+ cap.release()
227
+ if 'out' in locals() and out is not None:
228
+ out.release()
229
+ cv2.destroyAllWindows()
230
+
231
+ def download_youtube_video(youtube_url: str) -> Optional[str]:
232
+ """Download YouTube video and return path to downloaded file"""
233
+ try:
234
+ ydl_opts = {
235
+ 'format': 'best[ext=mp4]',
236
+ 'outtmpl': os.path.join(Config.TEMP_DIR, '%(title)s.%(ext)s')
237
+ }
238
+
239
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
240
+ info = ydl.extract_info(youtube_url, download=True)
241
+ video_path = os.path.join(Config.TEMP_DIR, f"{info['title']}.mp4")
242
+ return video_path if os.path.exists(video_path) else None
243
+
244
+ except Exception as e:
245
+ logger.error(f"Failed to retrieve video from YouTube: {e}")
246
+ return None
247
+
248
+ def main():
249
+ """Main application function"""
250
+ # Set page configuration
251
+ st.set_page_config(
252
+ page_title="YOLO Object Detection",
253
+ page_icon="🔍",
254
+ layout="wide",
255
+ initial_sidebar_state="expanded"
256
+ )
257
+
258
+ st.title("MULTIMEDIA OBJECT DETECTION USING YOLO")
259
+
260
+ # Initialize session state
261
+ if 'model' not in st.session_state:
262
+ st.session_state['model'] = None
263
+
264
+ # Model selection with description
265
+ st.subheader("Model Selection")
266
+ model_choice = st.selectbox(
267
+ "Select YOLO Model",
268
+ options=Config.AVAILABLE_MODELS,
269
+ index=Config.AVAILABLE_MODELS.index(Config.DEFAULT_MODEL),
270
+ format_func=lambda x: f"{x} - {Config.YOLO_MODELS[x]}"
271
+ )
272
+
273
+ # Display model capabilities
274
+ model_type = "Detection"
275
+ if "pose" in model_choice:
276
+ model_type = "Pose Estimation"
277
+ st.info("This model will detect and estimate human poses in the image/video.")
278
+ elif "seg" in model_choice:
279
+ model_type = "Instance Segmentation"
280
+ st.info("This model will perform instance segmentation, creating precise masks for detected objects.")
281
+ else:
282
+ st.info("This model will detect and classify objects with bounding boxes.")
283
+
284
+ # Initialize model and processors
285
+ try:
286
+ if st.session_state['model'] is None or st.session_state['model'].model_name != model_choice:
287
+ with st.spinner("Loading YOLO model..."):
288
+ st.session_state['model'] = YOLOModel(model_choice)
289
+ model = st.session_state['model']
290
+ image_processor = ImageProcessor(model)
291
+ video_processor = VideoProcessor(model)
292
+ except Exception as e:
293
+ st.error(f"Error initializing model: {str(e)}")
294
+ return
295
+
296
+ tabs = st.tabs(["Image Detection", "Video Detection"])
297
+
298
+ with tabs[0]:
299
+ st.header("Image Detection")
300
+ input_choice = st.radio("Select Input Method", ["Upload", "URL"])
301
+
302
+ if input_choice == "Upload":
303
+ uploaded_image = st.file_uploader(
304
+ "Upload Image",
305
+ type=Config.ALLOWED_IMAGE_TYPES,
306
+ key="image_uploader"
307
+ )
308
+ if uploaded_image is not None:
309
+ try:
310
+ with st.spinner("Processing image..."):
311
+ image = Image.open(uploaded_image)
312
+ result = image_processor.process_image(image)
313
+ if result.success:
314
+ st.image(result.image, caption="Processed Image", use_container_width=True)
315
+ else:
316
+ st.error(result.error_message)
317
+ except Exception as e:
318
+ st.error(f"Error processing image: {str(e)}")
319
+
320
+ elif input_choice == "URL":
321
+ image_url = st.text_input("Image URL", key="image_url")
322
+ if image_url:
323
+ try:
324
+ with st.spinner("Processing image from URL..."):
325
+ result = image_processor.process_image(image_url)
326
+ if result.success:
327
+ st.image(result.image, caption="Processed Image", use_container_width=True)
328
+ else:
329
+ st.error(result.error_message)
330
+ except Exception as e:
331
+ st.error(f"Error processing image URL: {str(e)}")
332
+
333
+ with tabs[1]:
334
+ st.header("Video Detection")
335
+ video_choice = st.radio("Select Input Method", ["Upload", "YouTube"])
336
+
337
+ if video_choice == "Upload":
338
+ uploaded_video = st.file_uploader(
339
+ "Upload Local Video",
340
+ type=Config.ALLOWED_VIDEO_TYPES,
341
+ key="video_uploader"
342
+ )
343
+ if uploaded_video is not None:
344
+ try:
345
+ # Create progress bar
346
+ progress_bar = st.progress(0)
347
+ status_text = st.empty()
348
+
349
+ # Save uploaded video
350
+ status_text.text("Saving uploaded video...")
351
+ input_video_path = os.path.join(Config.TEMP_DIR, uploaded_video.name)
352
+ with open(input_video_path, "wb") as f:
353
+ f.write(uploaded_video.getvalue())
354
+
355
+ # Process video
356
+ status_text.text("Processing video...")
357
+ progress_bar.progress(25)
358
+
359
+ success, result = video_processor.process_video(input_video_path)
360
+ progress_bar.progress(75)
361
+
362
+ if success:
363
+ status_text.text("Loading processed video...")
364
+ st.video(result)
365
+ status_text.text("Video processing complete!")
366
+ progress_bar.progress(100)
367
+ else:
368
+ st.error(f"Failed to process video: {result}")
369
+
370
+ # Cleanup
371
+ if os.path.exists(input_video_path):
372
+ os.remove(input_video_path)
373
+
374
+ except Exception as e:
375
+ st.error(f"Error processing video: {str(e)}")
376
+ finally:
377
+ # Clear status
378
+ if 'status_text' in locals():
379
+ status_text.empty()
380
+ if 'progress_bar' in locals():
381
+ progress_bar.empty()
382
+
383
+ elif video_choice == "YouTube":
384
+ video_url = st.text_input("YouTube Video URL", key="youtube_url")
385
+ if video_url:
386
+ try:
387
+ # Create progress indicators
388
+ progress_bar = st.progress(0)
389
+ status_text = st.empty()
390
+
391
+ # Download video
392
+ status_text.text("Downloading YouTube video...")
393
+ progress_bar.progress(25)
394
+
395
+ video_path = download_youtube_video(video_url)
396
+ if not video_path:
397
+ st.error("Failed to download YouTube video")
398
+ return
399
+
400
+ # Process video
401
+ status_text.text("Processing video...")
402
+ progress_bar.progress(50)
403
+
404
+ success, result = video_processor.process_video(video_path)
405
+ progress_bar.progress(75)
406
+
407
+ if success:
408
+ status_text.text("Loading processed video...")
409
+ st.video(result)
410
+ status_text.text("Video processing complete!")
411
+ progress_bar.progress(100)
412
+ else:
413
+ st.error(f"Failed to process video: {result}")
414
+
415
+ # Cleanup
416
+ if os.path.exists(video_path):
417
+ os.remove(video_path)
418
+
419
+ except Exception as e:
420
+ st.error(f"Error processing YouTube video: {str(e)}")
421
+ finally:
422
+ # Clear status
423
+ if 'status_text' in locals():
424
+ status_text.empty()
425
+ if 'progress_bar' in locals():
426
+ progress_bar.empty()
427
+
428
+ if __name__ == "__main__":
429
+ main()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ opencv-python>=4.8.0
2
+ numpy>=1.24.3
3
+ pillow>=9.5.0
4
+ requests>=2.31.0
5
+ streamlit>=1.24.0
6
+ ultralytics>=8.0.0
7
+ torch>=2.0.0
8
+ torchvision>=0.15.0
9
+ python-dotenv>=1.0.0
10
+ yt-dlp>=2023.3.4
11
+ python-multipart>=0.0.6
12
+ ffmpeg-python>=0.2.0