Spaces:

yasserrmd
/

DailySnap

Running on Zero

App Files Files Community

DailySnap / app.py

yasserrmd

Update app.py

01e83c8 verified 5 days ago

raw

history blame

4.04 kB

	import gradio as gr
	from ultralytics import YOLOv10
	import cv2
	import torch
	import os
	import spaces


	model = YOLOv10.from_pretrained('jameslahm/yolov10x')

	# Define activity categories based on detected objects
	activity_categories = {
	"Working": ["laptop", "computer", "keyboard", "office chair"],
	"Meal Time": ["fork", "spoon", "plate", "food"],
	"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
	"Outdoors": ["car", "tree", "bicycle", "road"],
	# Add more categories and objects as needed
	}

	# Function to map detected objects to categorized activities
	def categorize_activity(detected_objects):
	categorized_activities = {}

	for activity, objects in activity_categories.items():
	if any(obj in detected_objects for obj in objects):
	if activity not in categorized_activities:
	categorized_activities[activity] = []
	categorized_activities[activity].append(detected_objects)

	return categorized_activities

	# Function to process the video, detect objects, and generate a categorized journal with images
	@spaces.GPU
	def generate_journal_with_images(video_path):
	model.to("cuda")
	cap = cv2.VideoCapture(video_path)
	journal_entries = {}
	saved_images = []
	frame_count = 0
	output_folder = "detected_frames"
	os.makedirs(output_folder, exist_ok=True) # Create folder to store images

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Make predictions using YOLOv10 on the current frame
	results = model.predict(source=frame_rgb, device=device)

	# Draw bounding boxes on the frame
	results.render() # Render the results on the image (this modifies the frame in-place)

	# Save the image with bounding boxes
	frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
	cv2.imwrite(frame_filename, frame_rgb[:, :, ::-1]) # Convert back to BGR for saving
	saved_images.append(frame_filename)

	# Extract labels (class indices) and map them to class names
	detected_objects = [model.names[int(box.cls)] for box in results.boxes]

	# Get current timestamp in the video
	timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds

	# Categorize the detected objects into activities
	activity_summary = categorize_activity(detected_objects)

	# Store the activities with their timestamp
	for activity, objects in activity_summary.items():
	if activity not in journal_entries:
	journal_entries[activity] = []
	journal_entries[activity].append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))

	frame_count += 1

	cap.release()

	# Create a formatted journal output
	formatted_journal = []
	for activity, entries in journal_entries.items():
	formatted_journal.append(f"{activity}:")
	for entry, image_path in entries:
	formatted_journal.append((entry, image_path))

	return formatted_journal

	# Gradio interface for uploading video and generating journal with images
	def display_journal_with_images(video):
	journal_with_images = generate_journal_with_images(video)

	# Create the final display with text and images
	display_items = []
	for entry, image_path in journal_with_images:
	display_items.append((entry, image_path))

	return display_items

	# Define Gradio Blocks for custom display
	with gr.Blocks() as iface:
	video_input = gr.Video(label="Upload Video")
	output_gallery = gr.Gallery(label="Generated Daily Journal with Images").style(grid=[2], height='auto')
	run_button = gr.Button("Generate Journal")

	run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=output_gallery)

	iface.launch()