import gradio as gr from ultralytics import YOLOv10 import cv2 import spaces # Load YOLOv10 model model = YOLOv10.from_pretrained('jameslahm/yolov10x') # Define object categories to classify activities activity_categories = { "Working": ["laptop", "computer", "keyboard", "office chair"], "Meal Time": ["fork", "spoon", "plate", "food"], "Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"], "Outdoors": ["car", "tree", "bicycle", "road"], # Add more categories and associated objects as needed } # Function to map detected objects to categorized activities def categorize_activity(detected_objects): activity_summary = {} for activity, objects in activity_categories.items(): if any(obj in detected_objects for obj in objects): if activity not in activity_summary: activity_summary[activity] = 0 activity_summary[activity] += 1 # Increase count for that activity return activity_summary # Function to process the video and generate the journal @spaces.GPU def generate_journal(video): cap = cv2.VideoCapture(video) frame_rate = cap.get(cv2.CAP_PROP_FPS) journal_entries = {} while cap.isOpened(): ret, frame = cap.read() if not ret: break # Make predictions using YOLOv10 results = model.predict(source=frame) detected_objects = [res.name for res in results] # Get current timestamp in the video timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds # Categorize the detected objects into activities activity_summary = categorize_activity(detected_objects) # Store the activities with their timestamp for activity, count in activity_summary.items(): if activity not in journal_entries: journal_entries[activity] = [] journal_entries[activity].append(f"At {timestamp:.2f} seconds: {count} objects related to {activity}") cap.release() # Create a formatted journal formatted_journal = [] for activity, entries in journal_entries.items(): formatted_journal.append(f"**{activity}:**") formatted_journal.extend(entries) return "\n".join(formatted_journal) # Gradio interface for uploading video and generating journal iface = gr.Interface( fn=generate_journal, inputs=gr.Video(label="Upload Video"), outputs=gr.Textbox(label="Generated Daily Journal"), title="AI-Powered Daily Journal" ) iface.launch()