File size: 4,530 Bytes
100b9b2
 
 
d48f382
 
100b9b2
 
01e83c8
9c69830
 
100b9b2
d48f382
100b9b2
 
 
 
 
d48f382
100b9b2
 
 
 
d48f382
100b9b2
 
 
d48f382
 
 
100b9b2
d48f382
100b9b2
d48f382
100b9b2
f5586b0
d48f382
100b9b2
d48f382
 
fd362dd
d48f382
 
100b9b2
 
 
 
 
 
fd362dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d48f382
 
100b9b2
 
 
d48f382
100b9b2
 
 
d48f382
 
100b9b2
d48f382
100b9b2
d48f382
 
 
 
 
 
 
 
 
 
 
 
df98ef4
53d5146
d48f382
 
 
100b9b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
from ultralytics import YOLOv10
import cv2
import torch
import os
import spaces


device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)

# Define activity categories based on detected objects
activity_categories = {
    "Working": ["laptop", "computer", "keyboard", "office chair"],
    "Meal Time": ["fork", "spoon", "plate", "food"],
    "Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
    "Outdoors": ["car", "tree", "bicycle", "road"],
    # Add more categories and objects as needed
}

# Function to map detected objects to categorized activities
def categorize_activity(detected_objects):
    categorized_activities = {}
    
    for activity, objects in activity_categories.items():
        if any(obj in detected_objects for obj in objects):
            if activity not in categorized_activities:
                categorized_activities[activity] = []
            categorized_activities[activity].append(detected_objects)
    
    return categorized_activities

# Function to process the video, detect objects, and generate a categorized journal with images
@spaces.GPU
def generate_journal_with_images(video_path, frame_interval=30):
    cap = cv2.VideoCapture(video_path)
    journal_entries = {}
    saved_images = []
    frame_count = 0
    last_processed_frame = None
    output_folder = "detected_frames"
    os.makedirs(output_folder, exist_ok=True)  # Create folder to store images
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Process every Nth frame or if the current frame is different from the last processed frame
        if frame_count % frame_interval == 0 or (last_processed_frame is not None and is_frame_different(last_processed_frame, frame)):
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Make predictions using YOLOv10 on the current frame
            results = model.predict(source=frame_rgb, device=device)
            
            # Plot bounding boxes and labels on the image
            annotated_frame = results[0].plot()  # Plot detection results on the frame
            
            # Save the annotated image
            frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
            cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1])  # Convert back to BGR for saving
            saved_images.append(frame_filename)
            
            # Extract labels (class indices) and map them to class names
            detected_objects = [model.names[int(box.cls)] for box in results[0].boxes]  # Access the first result
            
            # Get current timestamp in the video
            timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000  # Convert ms to seconds
            
            # Categorize the detected objects into activities
            activity_summary = categorize_activity(detected_objects)
            
            # Store the activities with their timestamp
            for activity, objects in activity_summary.items():
                if activity not in journal_entries:
                    journal_entries[activity] = []
                journal_entries[activity].append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))
            
            last_processed_frame = frame  # Update the last processed frame
        
        frame_count += 1
    
    cap.release()
    
    # Create a formatted journal output
    formatted_journal = []
    for activity, entries in journal_entries.items():
        formatted_journal.append(f"**{activity}:**")
        for entry, image_path in entries:
            formatted_journal.append((entry, image_path))
    
    return formatted_journal

# Gradio interface for uploading video and generating journal with images
def display_journal_with_images(video):
    journal_with_images = generate_journal_with_images(video)
    
    # Create the final display with text and images
    display_items = []
    for entry, image_path in journal_with_images:
        display_items.append((entry, image_path))
    
    return display_items

with gr.Blocks() as iface:
    video_input = gr.Video(label="Upload Video", height=300)
    output_gallery = gr.Gallery(label="Generated Daily Journal with Images")
    run_button = gr.Button("Generate Journal")
    
    run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=output_gallery)

iface.launch()