Abhilashvj commited on
Commit
b5642bf
1 Parent(s): bdcf215

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -75
app.py CHANGED
@@ -10,9 +10,8 @@ import cv2
10
  from insightface.app import FaceAnalysis
11
  from moviepy.editor import VideoFileClip
12
  from sklearn.cluster import DBSCAN
13
- from collections import defaultdict
14
- import plotly.graph_objs as go
15
  from sklearn.decomposition import PCA
 
16
 
17
  # Load models
18
  @st.cache_resource
@@ -27,40 +26,71 @@ text_model, image_model, face_app = load_models()
27
 
28
  # Load data
29
  @st.cache_data
30
- def load_data(video_id):
31
- with open(f"{video_id}_summary.json", "r") as f:
32
  summary = json.load(f)
33
- with open(f"{video_id}_transcription.json", "r") as f:
34
  transcription = json.load(f)
35
- with open(f"{video_id}_text_metadata.json", "r") as f:
36
  text_metadata = json.load(f)
37
- with open(f"{video_id}_image_metadata.json", "r") as f:
38
  image_metadata = json.load(f)
39
- with open(f"{video_id}_face_metadata.json", "r") as f:
40
  face_metadata = json.load(f)
41
- return summary, transcription, text_metadata, image_metadata, face_metadata
 
42
 
43
  video_id = "IMFUOexuEXw"
 
44
  video_path = "avengers_interview.mp4"
45
- summary, transcription, text_metadata, image_metadata, face_metadata = load_data(video_id)
46
 
47
  # Load FAISS indexes
48
  @st.cache_resource
49
- def load_indexes(video_id):
50
- text_index = faiss.read_index(f"{video_id}_text_index.faiss")
51
- image_index = faiss.read_index(f"{video_id}_image_index.faiss")
52
- face_index = faiss.read_index(f"{video_id}_face_index.faiss")
53
- return text_index, image_index, face_index
54
 
55
- text_index, image_index, face_index = load_indexes(video_id)
56
 
57
- # Face clustering function
58
- def cluster_faces(face_embeddings, eps=0.5, min_samples=3):
 
 
59
  clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine').fit(face_embeddings)
60
- return clustering.labels_
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # Face clustering visualization
63
- def plot_face_clusters(face_embeddings, labels, face_metadata):
 
 
 
64
  pca = PCA(n_components=3)
65
  embeddings_3d = pca.fit_transform(face_embeddings)
66
 
@@ -70,12 +100,22 @@ def plot_face_clusters(face_embeddings, labels, face_metadata):
70
 
71
  traces = []
72
  for label, color in zip(unique_labels, colors):
 
 
73
  cluster_points = embeddings_3d[labels == label]
 
74
 
75
- hover_text = []
76
- for i, point in enumerate(cluster_points):
77
- face = face_metadata[np.where(labels == label)[0][i]]
78
- hover_text.append(f"Cluster {label}<br>Time: {face['start']:.2f}s")
 
 
 
 
 
 
 
79
 
80
  trace = go.Scatter3d(
81
  x=cluster_points[:, 0],
@@ -83,11 +123,7 @@ def plot_face_clusters(face_embeddings, labels, face_metadata):
83
  z=cluster_points[:, 2],
84
  mode='markers',
85
  name=f'Cluster {label}',
86
- marker=dict(
87
- size=5,
88
- color=color,
89
- opacity=0.8
90
- ),
91
  text=hover_text,
92
  hoverinfo='text'
93
  )
@@ -95,11 +131,7 @@ def plot_face_clusters(face_embeddings, labels, face_metadata):
95
 
96
  layout = go.Layout(
97
  title='Face Clusters Visualization',
98
- scene=dict(
99
- xaxis_title='PCA Component 1',
100
- yaxis_title='PCA Component 2',
101
- zaxis_title='PCA Component 3'
102
- ),
103
  margin=dict(r=0, b=0, l=0, t=40)
104
  )
105
 
@@ -124,9 +156,9 @@ def combined_search(query, text_index, image_index, text_metadata, image_metadat
124
  combined_results = sorted(text_results + image_results, key=lambda x: x['distance'])
125
  return combined_results[:n_results]
126
 
127
- def face_search(face_embedding, index, metadata, n_results=5):
128
- D, I = index.search(np.array(face_embedding).reshape(1, -1), n_results)
129
- results = [metadata[i] for i in I[0]]
130
  return results, D[0]
131
 
132
  def detect_and_embed_face(image, face_app):
@@ -156,43 +188,31 @@ st.sidebar.text_area("Full Transcript", transcript_text, height=300)
156
 
157
  # Main content
158
  st.header("Video Summary")
159
- col1, col2 = st.columns(2)
160
-
161
- with col1:
162
- st.subheader("Prominent Faces")
163
- for face in summary['prominent_faces']:
164
- st.write(f"Face ID: {face['id']}, Appearances: {face['appearances']}")
165
- if 'thumbnail' in face:
166
- image = Image.open(io.BytesIO(base64.b64decode(face['thumbnail'])))
167
- st.image(image, caption=f"Face ID: {face['id']}", width=100)
168
-
169
- with col2:
170
- st.subheader("Themes")
171
- for theme in summary['themes']:
172
- st.write(f"Theme ID: {theme['id']}, Keywords: {', '.join(theme['keywords'])}")
173
 
174
- # Face Clustering
175
- st.header("Face Clustering")
176
- face_embeddings = face_index.reconstruct_n(0, face_index.ntotal)
177
- face_labels = cluster_faces(face_embeddings)
 
 
 
 
 
 
 
 
 
178
 
179
- # Update face clusters in summary
180
- face_clusters = defaultdict(list)
181
- for i, label in enumerate(face_labels):
182
- face_clusters[label].append(face_metadata[i])
183
-
184
- summary['face_clusters'] = [
185
- {
186
- 'cluster_id': f'cluster_{label}',
187
- 'faces': cluster
188
- } for label, cluster in face_clusters.items()
189
- ]
190
-
191
- # Visualize face clusters
192
  st.subheader("Face Cluster Visualization")
193
- fig = plot_face_clusters(face_embeddings, face_labels, face_metadata)
194
  st.plotly_chart(fig)
195
 
 
 
 
 
 
196
  # Search functionality
197
  st.header("Search")
198
 
@@ -231,13 +251,13 @@ elif search_type == "Face":
231
  face_search_type = st.radio("Choose face search method", ["Select from clusters", "Upload image"])
232
 
233
  if face_search_type == "Select from clusters":
234
- cluster_id = st.selectbox("Select a face cluster", [f'cluster_{label}' for label in set(face_labels) if label != -1])
235
  if st.button("Search"):
236
- selected_cluster = next(cluster for cluster in summary['face_clusters'] if cluster['cluster_id'] == cluster_id)
237
  st.subheader("Face Cluster Search Results")
238
- for face in selected_cluster['faces']:
239
- st.write(f"Time: {face['start']:.2f}s - {face['end']:.2f}s")
240
- clip_path = create_video_clip(video_path, face['start'], face['end'], f"temp_face_clip_{face['start']}.mp4")
241
  st.video(clip_path)
242
  st.write("---")
243
  else:
 
10
  from insightface.app import FaceAnalysis
11
  from moviepy.editor import VideoFileClip
12
  from sklearn.cluster import DBSCAN
 
 
13
  from sklearn.decomposition import PCA
14
+ import plotly.graph_objs as go
15
 
16
  # Load models
17
  @st.cache_resource
 
26
 
27
  # Load data
28
  @st.cache_data
29
+ def load_data(video_id, output_dir):
30
+ with open(f"{output_dir}/{video_id}_summary.json", "r") as f:
31
  summary = json.load(f)
32
+ with open(f"{output_dir}/{video_id}_transcription.json", "r") as f:
33
  transcription = json.load(f)
34
+ with open(f"{output_dir}/{video_id}_text_metadata.json", "r") as f:
35
  text_metadata = json.load(f)
36
+ with open(f"{output_dir}/{video_id}_image_metadata.json", "r") as f:
37
  image_metadata = json.load(f)
38
+ with open(f"{output_dir}/{video_id}_face_metadata.json", "r") as f:
39
  face_metadata = json.load(f)
40
+ face_index = faiss.read_index(f"{output_dir}/{video_id}_face_index.faiss")
41
+ return summary, transcription, text_metadata, image_metadata, face_metadata, face_index
42
 
43
  video_id = "IMFUOexuEXw"
44
+ output_dir = "video_analysis_output"
45
  video_path = "avengers_interview.mp4"
46
+ summary, transcription, text_metadata, image_metadata, face_metadata, face_index = load_data(video_id, output_dir)
47
 
48
  # Load FAISS indexes
49
  @st.cache_resource
50
+ def load_indexes(video_id, output_dir):
51
+ text_index = faiss.read_index(f"{output_dir}/{video_id}_text_index.faiss")
52
+ image_index = faiss.read_index(f"{output_dir}/{video_id}_image_index.faiss")
53
+ return text_index, image_index
 
54
 
55
+ text_index, image_index = load_indexes(video_id, output_dir)
56
 
57
+ # Comprehensive face summarization
58
+ def create_comprehensive_face_summary(face_index, face_metadata, eps=0.5, min_samples=3):
59
+ face_embeddings = face_index.reconstruct_n(0, face_index.ntotal)
60
+
61
  clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine').fit(face_embeddings)
62
+
63
+ face_clusters = {}
64
+ for i, label in enumerate(clustering.labels_):
65
+ if label not in face_clusters:
66
+ face_clusters[label] = []
67
+ face_clusters[label].append(i)
68
+
69
+ summary = []
70
+ for label, indices in face_clusters.items():
71
+ if label != -1: # Ignore noise points
72
+ cluster_appearances = [face_metadata[i] for i in indices]
73
+ cluster_summary = {
74
+ "cluster_id": f"cluster_{label}",
75
+ "face_count": len(indices),
76
+ "appearances": cluster_appearances,
77
+ "timeline": [
78
+ {"start": app['start'], "end": app['end']}
79
+ for app in cluster_appearances
80
+ ],
81
+ "total_screen_time": sum(app['end'] - app['start'] for app in cluster_appearances),
82
+ "first_appearance": min(app['start'] for app in cluster_appearances),
83
+ "last_appearance": max(app['end'] for app in cluster_appearances)
84
+ }
85
+ summary.append(cluster_summary)
86
+
87
+ return summary, face_embeddings, clustering.labels_
88
 
89
+ # Create comprehensive face summary
90
+ face_summary, face_embeddings, face_labels = create_comprehensive_face_summary(face_index, face_metadata)
91
+
92
+ # Face cluster visualization
93
+ def plot_face_clusters_interactive(face_embeddings, labels, face_summary):
94
  pca = PCA(n_components=3)
95
  embeddings_3d = pca.fit_transform(face_embeddings)
96
 
 
100
 
101
  traces = []
102
  for label, color in zip(unique_labels, colors):
103
+ if label == -1:
104
+ continue # Skip noise points
105
  cluster_points = embeddings_3d[labels == label]
106
+ cluster_info = next((c for c in face_summary if c['cluster_id'] == f'cluster_{label}'), None)
107
 
108
+ if cluster_info:
109
+ hover_text = [
110
+ f"Cluster {label}<br>"
111
+ f"Face count: {cluster_info['face_count']}<br>"
112
+ f"Total screen time: {cluster_info['total_screen_time']:.2f}s<br>"
113
+ f"First appearance: {cluster_info['first_appearance']:.2f}s<br>"
114
+ f"Last appearance: {cluster_info['last_appearance']:.2f}s"
115
+ for _ in cluster_points
116
+ ]
117
+ else:
118
+ hover_text = [f"Cluster {label}" for _ in cluster_points]
119
 
120
  trace = go.Scatter3d(
121
  x=cluster_points[:, 0],
 
123
  z=cluster_points[:, 2],
124
  mode='markers',
125
  name=f'Cluster {label}',
126
+ marker=dict(size=5, color=color, opacity=0.8),
 
 
 
 
127
  text=hover_text,
128
  hoverinfo='text'
129
  )
 
131
 
132
  layout = go.Layout(
133
  title='Face Clusters Visualization',
134
+ scene=dict(xaxis_title='PCA 1', yaxis_title='PCA 2', zaxis_title='PCA 3'),
 
 
 
 
135
  margin=dict(r=0, b=0, l=0, t=40)
136
  )
137
 
 
156
  combined_results = sorted(text_results + image_results, key=lambda x: x['distance'])
157
  return combined_results[:n_results]
158
 
159
+ def face_search(face_embedding, face_index, face_metadata, n_results=5):
160
+ D, I = face_index.search(np.array([face_embedding]), n_results)
161
+ results = [face_metadata[i] for i in I[0]]
162
  return results, D[0]
163
 
164
  def detect_and_embed_face(image, face_app):
 
188
 
189
  # Main content
190
  st.header("Video Summary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ # Face Clusters
193
+ st.subheader("Face Clusters")
194
+ for cluster in face_summary[:5]: # Display first 5 clusters
195
+ st.write(f"Cluster {cluster['cluster_id']}:")
196
+ st.write(f" Face count: {cluster['face_count']}")
197
+ st.write(f" Total screen time: {cluster['total_screen_time']:.2f} seconds")
198
+ st.write(f" First appearance: {cluster['first_appearance']:.2f} seconds")
199
+ st.write(f" Last appearance: {cluster['last_appearance']:.2f} seconds")
200
+ st.write(f" Timeline: {len(cluster['timeline'])} appearances")
201
+ st.write(" First 5 appearances:")
202
+ for app in cluster['timeline'][:5]:
203
+ st.write(f" {app['start']:.2f}s - {app['end']:.2f}s")
204
+ st.write("---")
205
 
206
+ # Face Cluster Visualization
 
 
 
 
 
 
 
 
 
 
 
 
207
  st.subheader("Face Cluster Visualization")
208
+ fig = plot_face_clusters_interactive(face_embeddings, face_labels, face_summary)
209
  st.plotly_chart(fig)
210
 
211
+ # Themes
212
+ st.subheader("Themes")
213
+ for theme in summary['themes']:
214
+ st.write(f"Theme ID: {theme['id']}, Keywords: {', '.join(theme['keywords'])}")
215
+
216
  # Search functionality
217
  st.header("Search")
218
 
 
251
  face_search_type = st.radio("Choose face search method", ["Select from clusters", "Upload image"])
252
 
253
  if face_search_type == "Select from clusters":
254
+ cluster_id = st.selectbox("Select a face cluster", [cluster['cluster_id'] for cluster in face_summary])
255
  if st.button("Search"):
256
+ selected_cluster = next(cluster for cluster in face_summary if cluster['cluster_id'] == cluster_id)
257
  st.subheader("Face Cluster Search Results")
258
+ for appearance in selected_cluster['appearances'][:5]: # Show first 5 appearances
259
+ st.write(f"Time: {appearance['start']:.2f}s - {appearance['end']:.2f}s")
260
+ clip_path = create_video_clip(video_path, appearance['start'], appearance['end'], f"temp_face_clip_{appearance['start']}.mp4")
261
  st.video(clip_path)
262
  st.write("---")
263
  else: