DailySnap / app.py
yasserrmd's picture
Update app.py
6fb0ffb verified
raw
history blame
4.6 kB
import gradio as gr
from ultralytics import YOLOv10
from skimage.metrics import structural_similarity as ssim
import cv2
import torch
import os
import spaces
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)
# Define activity categories based on detected objects
activity_categories = {
"Working": ["laptop", "computer", "keyboard", "office chair"],
"Meal Time": ["fork", "spoon", "plate", "food"],
"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
"Outdoors": ["car", "tree", "bicycle", "road"],
# Add more categories and objects as needed
}
# Function to map detected objects to categorized activities
def categorize_activity(detected_objects):
categorized_activities = {}
for activity, objects in activity_categories.items():
if any(obj in detected_objects for obj in objects):
if activity not in categorized_activities:
categorized_activities[activity] = []
categorized_activities[activity].append(detected_objects)
return categorized_activities
# Function to compare frames using SSIM to avoid repeated frames
def is_frame_different(frame1, frame2, threshold=0.9):
gray_frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray_frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
score, _ = ssim(gray_frame1, gray_frame2, full=True)
return score < threshold
# Function to process the video, detect objects, and generate a categorized journal with images
@spaces.GPU
def generate_journal_with_images(video_path, frame_interval=30):
cap = cv2.VideoCapture(video_path)
journal_entries = []
image_paths = []
frame_count = 0
last_processed_frame = None
output_folder = "detected_frames"
os.makedirs(output_folder, exist_ok=True) # Create folder to store images
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process every Nth frame or if the current frame is different from the last processed frame
if frame_count % frame_interval == 0 or (last_processed_frame is not None and is_frame_different(last_processed_frame, frame)):
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Make predictions using YOLOv10 on the current frame
results = model.predict(source=frame_rgb, device=device)
# Extract detected objects
detected_objects = [model.names[int(box.cls)] for box in results[0].boxes]
# Only process frames where objects are detected
if detected_objects: # If there are detected objects in the frame
# Plot bounding boxes and labels on the image
annotated_frame = results[0].plot() # Plot detection results on the frame
# Save the annotated image
frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1]) # Convert back to BGR for saving
image_paths.append(frame_filename)
# Get current timestamp in the video
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds
# Categorize the detected objects into activities
activity_summary = categorize_activity(detected_objects)
# Store the activities with their timestamp
for activity, objects in activity_summary.items():
journal_entries.append(f"At {timestamp:.2f} seconds: {', '.join(objects[0])}")
last_processed_frame = frame # Update the last processed frame
frame_count += 1
cap.release()
return journal_entries, image_paths
def display_journal_with_images(video):
journal_entries, image_paths = generate_journal_with_images(video, frame_interval=30)
journal_text = "\n".join(journal_entries)
return journal_text, image_paths
with gr.Blocks() as iface:
video_input = gr.Video(label="Upload Video", height=300)
journal_output = gr.Textbox(label="Generated Daily Journal", lines=10)
image_gallery = gr.Gallery(label="Annotated Frames")
run_button = gr.Button("Generate Journal")
run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=[journal_output, image_gallery])
iface.launch()
iface.launch()