Spaces:

Abhilashvj
/

video-search

Sleeping

App Files Files Community

Abhilashvj commited on 15 days ago

Commit

b5642bf

•

1 Parent(s): bdcf215

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -75

app.py CHANGED Viewed

@@ -10,9 +10,8 @@ import cv2
 from insightface.app import FaceAnalysis
 from moviepy.editor import VideoFileClip
 from sklearn.cluster import DBSCAN
-from collections import defaultdict
-import plotly.graph_objs as go
 from sklearn.decomposition import PCA
 # Load models
 @st.cache_resource
@@ -27,40 +26,71 @@ text_model, image_model, face_app = load_models()
 # Load data
 @st.cache_data
-def load_data(video_id):
-    with open(f"{video_id}_summary.json", "r") as f:
         summary = json.load(f)
-    with open(f"{video_id}_transcription.json", "r") as f:
         transcription = json.load(f)
-    with open(f"{video_id}_text_metadata.json", "r") as f:
         text_metadata = json.load(f)
-    with open(f"{video_id}_image_metadata.json", "r") as f:
         image_metadata = json.load(f)
-    with open(f"{video_id}_face_metadata.json", "r") as f:
         face_metadata = json.load(f)
-    return summary, transcription, text_metadata, image_metadata, face_metadata
 video_id = "IMFUOexuEXw"
 video_path = "avengers_interview.mp4"
-summary, transcription, text_metadata, image_metadata, face_metadata = load_data(video_id)
 # Load FAISS indexes
 @st.cache_resource
-def load_indexes(video_id):
-    text_index = faiss.read_index(f"{video_id}_text_index.faiss")
-    image_index = faiss.read_index(f"{video_id}_image_index.faiss")
-    face_index = faiss.read_index(f"{video_id}_face_index.faiss")
-    return text_index, image_index, face_index
-text_index, image_index, face_index = load_indexes(video_id)
-# Face clustering function
-def cluster_faces(face_embeddings, eps=0.5, min_samples=3):
     clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine').fit(face_embeddings)
-    return clustering.labels_
-# Face clustering visualization
-def plot_face_clusters(face_embeddings, labels, face_metadata):
     pca = PCA(n_components=3)
     embeddings_3d = pca.fit_transform(face_embeddings)
@@ -70,12 +100,22 @@ def plot_face_clusters(face_embeddings, labels, face_metadata):
     traces = []
     for label, color in zip(unique_labels, colors):
         cluster_points = embeddings_3d[labels == label]
-        hover_text = []
-        for i, point in enumerate(cluster_points):
-            face = face_metadata[np.where(labels == label)[0][i]]
-            hover_text.append(f"Cluster {label}<br>Time: {face['start']:.2f}s")
         trace = go.Scatter3d(
             x=cluster_points[:, 0],
@@ -83,11 +123,7 @@ def plot_face_clusters(face_embeddings, labels, face_metadata):
             z=cluster_points[:, 2],
             mode='markers',
             name=f'Cluster {label}',
-            marker=dict(
-                size=5,
-                color=color,
-                opacity=0.8
-            ),
             text=hover_text,
             hoverinfo='text'
         )
@@ -95,11 +131,7 @@ def plot_face_clusters(face_embeddings, labels, face_metadata):
     layout = go.Layout(
         title='Face Clusters Visualization',
-        scene=dict(
-            xaxis_title='PCA Component 1',
-            yaxis_title='PCA Component 2',
-            zaxis_title='PCA Component 3'
-        ),
         margin=dict(r=0, b=0, l=0, t=40)
     )
@@ -124,9 +156,9 @@ def combined_search(query, text_index, image_index, text_metadata, image_metadat
     combined_results = sorted(text_results + image_results, key=lambda x: x['distance'])
     return combined_results[:n_results]
-def face_search(face_embedding, index, metadata, n_results=5):
-    D, I = index.search(np.array(face_embedding).reshape(1, -1), n_results)
-    results = [metadata[i] for i in I[0]]
     return results, D[0]
 def detect_and_embed_face(image, face_app):
@@ -156,43 +188,31 @@ st.sidebar.text_area("Full Transcript", transcript_text, height=300)
 # Main content
 st.header("Video Summary")
-col1, col2 = st.columns(2)
-with col1:
-    st.subheader("Prominent Faces")
-    for face in summary['prominent_faces']:
-        st.write(f"Face ID: {face['id']}, Appearances: {face['appearances']}")
-        if 'thumbnail' in face:
-            image = Image.open(io.BytesIO(base64.b64decode(face['thumbnail'])))
-            st.image(image, caption=f"Face ID: {face['id']}", width=100)
-with col2:
-    st.subheader("Themes")
-    for theme in summary['themes']:
-        st.write(f"Theme ID: {theme['id']}, Keywords: {', '.join(theme['keywords'])}")
-# Face Clustering
-st.header("Face Clustering")
-face_embeddings = face_index.reconstruct_n(0, face_index.ntotal)
-face_labels = cluster_faces(face_embeddings)
-# Update face clusters in summary
-face_clusters = defaultdict(list)
-for i, label in enumerate(face_labels):
-    face_clusters[label].append(face_metadata[i])
-summary['face_clusters'] = [
-    {
-        'cluster_id': f'cluster_{label}',
-        'faces': cluster
-    } for label, cluster in face_clusters.items()
-]
-# Visualize face clusters
 st.subheader("Face Cluster Visualization")
-fig = plot_face_clusters(face_embeddings, face_labels, face_metadata)
 st.plotly_chart(fig)
 # Search functionality
 st.header("Search")
@@ -231,13 +251,13 @@ elif search_type == "Face":
     face_search_type = st.radio("Choose face search method", ["Select from clusters", "Upload image"])
     if face_search_type == "Select from clusters":
-        cluster_id = st.selectbox("Select a face cluster", [f'cluster_{label}' for label in set(face_labels) if label != -1])
         if st.button("Search"):
-            selected_cluster = next(cluster for cluster in summary['face_clusters'] if cluster['cluster_id'] == cluster_id)
             st.subheader("Face Cluster Search Results")
-            for face in selected_cluster['faces']:
-                st.write(f"Time: {face['start']:.2f}s - {face['end']:.2f}s")
-                clip_path = create_video_clip(video_path, face['start'], face['end'], f"temp_face_clip_{face['start']}.mp4")
                 st.video(clip_path)
                 st.write("---")
     else:

 from insightface.app import FaceAnalysis
 from moviepy.editor import VideoFileClip
 from sklearn.cluster import DBSCAN
 from sklearn.decomposition import PCA
+import plotly.graph_objs as go
 # Load models
 @st.cache_resource
 # Load data
 @st.cache_data
+def load_data(video_id, output_dir):
+    with open(f"{output_dir}/{video_id}_summary.json", "r") as f:
         summary = json.load(f)
+    with open(f"{output_dir}/{video_id}_transcription.json", "r") as f:
         transcription = json.load(f)
+    with open(f"{output_dir}/{video_id}_text_metadata.json", "r") as f:
         text_metadata = json.load(f)
+    with open(f"{output_dir}/{video_id}_image_metadata.json", "r") as f:
         image_metadata = json.load(f)
+    with open(f"{output_dir}/{video_id}_face_metadata.json", "r") as f:
         face_metadata = json.load(f)
+    face_index = faiss.read_index(f"{output_dir}/{video_id}_face_index.faiss")
+    return summary, transcription, text_metadata, image_metadata, face_metadata, face_index
 video_id = "IMFUOexuEXw"
+output_dir = "video_analysis_output"
 video_path = "avengers_interview.mp4"
+summary, transcription, text_metadata, image_metadata, face_metadata, face_index = load_data(video_id, output_dir)
 # Load FAISS indexes
 @st.cache_resource
+def load_indexes(video_id, output_dir):
+    text_index = faiss.read_index(f"{output_dir}/{video_id}_text_index.faiss")
+    image_index = faiss.read_index(f"{output_dir}/{video_id}_image_index.faiss")
+    return text_index, image_index
+text_index, image_index = load_indexes(video_id, output_dir)
+# Comprehensive face summarization
+def create_comprehensive_face_summary(face_index, face_metadata, eps=0.5, min_samples=3):
+    face_embeddings = face_index.reconstruct_n(0, face_index.ntotal)
     clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine').fit(face_embeddings)
+    face_clusters = {}
+    for i, label in enumerate(clustering.labels_):
+        if label not in face_clusters:
+            face_clusters[label] = []
+        face_clusters[label].append(i)
+    summary = []
+    for label, indices in face_clusters.items():
+        if label != -1:  # Ignore noise points
+            cluster_appearances = [face_metadata[i] for i in indices]
+            cluster_summary = {
+                "cluster_id": f"cluster_{label}",
+                "face_count": len(indices),
+                "appearances": cluster_appearances,
+                "timeline": [
+                    {"start": app['start'], "end": app['end']}
+                    for app in cluster_appearances
+                ],
+                "total_screen_time": sum(app['end'] - app['start'] for app in cluster_appearances),
+                "first_appearance": min(app['start'] for app in cluster_appearances),
+                "last_appearance": max(app['end'] for app in cluster_appearances)
+            }
+            summary.append(cluster_summary)
+    return summary, face_embeddings, clustering.labels_
+# Create comprehensive face summary
+face_summary, face_embeddings, face_labels = create_comprehensive_face_summary(face_index, face_metadata)
+# Face cluster visualization
+def plot_face_clusters_interactive(face_embeddings, labels, face_summary):
     pca = PCA(n_components=3)
     embeddings_3d = pca.fit_transform(face_embeddings)
     traces = []
     for label, color in zip(unique_labels, colors):
+        if label == -1:
+            continue  # Skip noise points
         cluster_points = embeddings_3d[labels == label]
+        cluster_info = next((c for c in face_summary if c['cluster_id'] == f'cluster_{label}'), None)
+        if cluster_info:
+            hover_text = [
+                f"Cluster {label}<br>"
+                f"Face count: {cluster_info['face_count']}<br>"
+                f"Total screen time: {cluster_info['total_screen_time']:.2f}s<br>"
+                f"First appearance: {cluster_info['first_appearance']:.2f}s<br>"
+                f"Last appearance: {cluster_info['last_appearance']:.2f}s"
+                for _ in cluster_points
+            ]
+        else:
+            hover_text = [f"Cluster {label}" for _ in cluster_points]
         trace = go.Scatter3d(
             x=cluster_points[:, 0],
             z=cluster_points[:, 2],
             mode='markers',
             name=f'Cluster {label}',
+            marker=dict(size=5, color=color, opacity=0.8),
             text=hover_text,
             hoverinfo='text'
         )
     layout = go.Layout(
         title='Face Clusters Visualization',
+        scene=dict(xaxis_title='PCA 1', yaxis_title='PCA 2', zaxis_title='PCA 3'),
         margin=dict(r=0, b=0, l=0, t=40)
     )
     combined_results = sorted(text_results + image_results, key=lambda x: x['distance'])
     return combined_results[:n_results]
+def face_search(face_embedding, face_index, face_metadata, n_results=5):
+    D, I = face_index.search(np.array([face_embedding]), n_results)
+    results = [face_metadata[i] for i in I[0]]
     return results, D[0]
 def detect_and_embed_face(image, face_app):
 # Main content
 st.header("Video Summary")
+# Face Clusters
+st.subheader("Face Clusters")
+for cluster in face_summary[:5]:  # Display first 5 clusters
+    st.write(f"Cluster {cluster['cluster_id']}:")
+    st.write(f"  Face count: {cluster['face_count']}")
+    st.write(f"  Total screen time: {cluster['total_screen_time']:.2f} seconds")
+    st.write(f"  First appearance: {cluster['first_appearance']:.2f} seconds")
+    st.write(f"  Last appearance: {cluster['last_appearance']:.2f} seconds")
+    st.write(f"  Timeline: {len(cluster['timeline'])} appearances")
+    st.write("  First 5 appearances:")
+    for app in cluster['timeline'][:5]:
+        st.write(f"    {app['start']:.2f}s - {app['end']:.2f}s")
+    st.write("---")
+# Face Cluster Visualization
 st.subheader("Face Cluster Visualization")
+fig = plot_face_clusters_interactive(face_embeddings, face_labels, face_summary)
 st.plotly_chart(fig)
+# Themes
+st.subheader("Themes")
+for theme in summary['themes']:
+    st.write(f"Theme ID: {theme['id']}, Keywords: {', '.join(theme['keywords'])}")
 # Search functionality
 st.header("Search")
     face_search_type = st.radio("Choose face search method", ["Select from clusters", "Upload image"])
     if face_search_type == "Select from clusters":
+        cluster_id = st.selectbox("Select a face cluster", [cluster['cluster_id'] for cluster in face_summary])
         if st.button("Search"):
+            selected_cluster = next(cluster for cluster in face_summary if cluster['cluster_id'] == cluster_id)
             st.subheader("Face Cluster Search Results")
+            for appearance in selected_cluster['appearances'][:5]:  # Show first 5 appearances
+                st.write(f"Time: {appearance['start']:.2f}s - {appearance['end']:.2f}s")
+                clip_path = create_video_clip(video_path, appearance['start'], appearance['end'], f"temp_face_clip_{appearance['start']}.mp4")
                 st.video(clip_path)
                 st.write("---")
     else: