File size: 4,858 Bytes
100b9b2 d48f382 100b9b2 01e83c8 9c69830 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 8a4dc7e d48f382 100b9b2 f5586b0 d48f382 100b9b2 d48f382 fd362dd d48f382 100b9b2 fd362dd d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 100b9b2 d48f382 df98ef4 53d5146 d48f382 100b9b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
from ultralytics import YOLOv10
import cv2
import torch
import os
import spaces
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)
# Define activity categories based on detected objects
activity_categories = {
"Working": ["laptop", "computer", "keyboard", "office chair"],
"Meal Time": ["fork", "spoon", "plate", "food"],
"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
"Outdoors": ["car", "tree", "bicycle", "road"],
# Add more categories and objects as needed
}
# Function to map detected objects to categorized activities
def categorize_activity(detected_objects):
categorized_activities = {}
for activity, objects in activity_categories.items():
if any(obj in detected_objects for obj in objects):
if activity not in categorized_activities:
categorized_activities[activity] = []
categorized_activities[activity].append(detected_objects)
return categorized_activities
# Function to compare frames using SSIM to avoid repeated frames
def is_frame_different(frame1, frame2, threshold=0.9):
gray_frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray_frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
score, _ = ssim(gray_frame1, gray_frame2, full=True)
return score < threshold
# Function to process the video, detect objects, and generate a categorized journal with images
@spaces.GPU
def generate_journal_with_images(video_path, frame_interval=30):
cap = cv2.VideoCapture(video_path)
journal_entries = {}
saved_images = []
frame_count = 0
last_processed_frame = None
output_folder = "detected_frames"
os.makedirs(output_folder, exist_ok=True) # Create folder to store images
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process every Nth frame or if the current frame is different from the last processed frame
if frame_count % frame_interval == 0 or (last_processed_frame is not None and is_frame_different(last_processed_frame, frame)):
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Make predictions using YOLOv10 on the current frame
results = model.predict(source=frame_rgb, device=device)
# Plot bounding boxes and labels on the image
annotated_frame = results[0].plot() # Plot detection results on the frame
# Save the annotated image
frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1]) # Convert back to BGR for saving
saved_images.append(frame_filename)
# Extract labels (class indices) and map them to class names
detected_objects = [model.names[int(box.cls)] for box in results[0].boxes] # Access the first result
# Get current timestamp in the video
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds
# Categorize the detected objects into activities
activity_summary = categorize_activity(detected_objects)
# Store the activities with their timestamp
for activity, objects in activity_summary.items():
if activity not in journal_entries:
journal_entries[activity] = []
journal_entries[activity].append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))
last_processed_frame = frame # Update the last processed frame
frame_count += 1
cap.release()
# Create a formatted journal output
formatted_journal = []
for activity, entries in journal_entries.items():
formatted_journal.append(f"**{activity}:**")
for entry, image_path in entries:
formatted_journal.append((entry, image_path))
return formatted_journal
# Gradio interface for uploading video and generating journal with images
def display_journal_with_images(video):
journal_with_images = generate_journal_with_images(video)
# Create the final display with text and images
display_items = []
for entry, image_path in journal_with_images:
display_items.append((entry, image_path))
return display_items
with gr.Blocks() as iface:
video_input = gr.Video(label="Upload Video", height=300)
output_gallery = gr.Gallery(label="Generated Daily Journal with Images")
run_button = gr.Button("Generate Journal")
run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=output_gallery)
iface.launch()
|