dodijk iskaj prajaktashouche commited on
Commit
b9cd4c4
1 Parent(s): a4158a1

Move plotting code to plot.py

Browse files

Co-authored-by: iskaj <[email protected]>
Co-authored-by: prajaktashouche <[email protected]>

Files changed (3) hide show
  1. app.py +23 -118
  2. plot.py +58 -0
  3. videomatch.py +60 -1
app.py CHANGED
@@ -1,131 +1,16 @@
1
  import logging
2
- import time
3
 
4
- import pandas
5
  import gradio as gr
6
 
7
- import seaborn as sns
8
- import matplotlib.pyplot as plt
9
-
10
- import numpy as np
11
- import pandas as pd
12
-
13
  from config import *
14
  from videomatch import index_hashes_for_video, get_decent_distance, \
15
- get_video_indices, compare_videos, get_change_points
16
-
17
 
18
  logging.basicConfig()
19
  logging.getLogger().setLevel(logging.INFO)
20
 
21
 
22
- def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
23
- sns.set_theme()
24
-
25
- x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
26
- x = [i/FPS for j in x for i in j]
27
- y = [i/FPS for i in I]
28
-
29
- # Create figure and dataframe to plot with sns
30
- fig = plt.figure()
31
- # plt.tight_layout()
32
- df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
33
- g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)
34
-
35
- # Set x-labels to be more readable
36
- x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
37
- x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
38
- plt.xticks(x_locs, x_labels)
39
- plt.xticks(rotation=90)
40
- plt.xlabel('Time in source video (H:M:S)')
41
- plt.xlim(0, None)
42
-
43
- # Set y-labels to be more readable
44
- y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
45
- y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
46
- plt.yticks(y_locs, y_labels)
47
- plt.ylabel('Time in target video (H:M:S)')
48
-
49
- # Adjust padding to fit gradio
50
- plt.subplots_adjust(bottom=0.25, left=0.20)
51
- return fig
52
-
53
- def plot_multi_comparison(df, change_points):
54
- """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
55
- fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
56
- sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0,0])
57
- sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
58
- sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1,0])
59
- sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,1])
60
-
61
- # Plot change point as lines
62
- sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2,1])
63
- for x in change_points:
64
- cp_time = x.start_time
65
- plt.vlines(x=cp_time, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='red', lw=2)
66
- rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
67
- plt.text(x=cp_time, y=rand_y_pos, s=str(np.round(x.confidence, 2)), color='r', rotation=-0.0, fontsize=14)
68
- plt.xticks(rotation=90)
69
- return fig
70
-
71
-
72
- def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
73
- distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
74
- video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
75
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
76
-
77
- target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
78
- target_s = [i/FPS for j in target for i in j]
79
- source_s = [i/FPS for i in I]
80
-
81
- # Make df
82
- df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
83
- if vanilla_df:
84
- return df
85
-
86
- # Minimum distance dataframe ----
87
- # Group by X so for every second/x there will be 1 value of Y in the end
88
- # index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
89
- # df_min = df.loc[index_min_distance]
90
- # df_min
91
- # -------------------------------
92
-
93
- df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
94
- df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
95
-
96
- # Group by X so for every second/x there will be 1 value of Y in the end
97
- grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
98
- grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
99
-
100
- # Remake the dataframe
101
- df = grouped_X.reset_index()
102
- df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
103
- df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
104
-
105
- # Add NAN to "missing" x values (base it off hash vector, not target_s)
106
- step_size = 1/FPS
107
- x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
108
- df['TARGET_S'] = np.round(df['TARGET_S'], 1)
109
- df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])
110
-
111
- # Merge dataframes to get NAN values for every missing SOURCE_S
112
- df = df_complete.merge(df, on='TARGET_S', how='left')
113
-
114
- # Interpolate between frames since there are missing values
115
- df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
116
-
117
- # Add timeshift col and timeshift col with Linearly Interpolated Values
118
- df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
119
- df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
120
-
121
- # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
122
- df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
123
- df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
124
-
125
- # Add time column for plotting
126
- df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
127
- return df
128
-
129
  def get_comparison(url, target, MIN_DISTANCE = 4):
130
  """ Function for Gradio to combine all helper functions"""
131
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
@@ -147,6 +32,26 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
147
  fig = plot_multi_comparison(df, change_points)
148
  return fig
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
 
152
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
@@ -166,7 +71,7 @@ compare_iface = gr.Interface(fn=get_comparison,
166
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
167
 
168
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
169
- inputs=["text", "text", gr.Slider(1, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="CUSUM")],
170
  outputs="plot",
171
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
172
 
 
1
  import logging
 
2
 
 
3
  import gradio as gr
4
 
 
 
 
 
 
 
5
  from config import *
6
  from videomatch import index_hashes_for_video, get_decent_distance, \
7
+ get_video_indices, compare_videos, get_change_points, get_videomatch_df
8
+ from plot import plot_comparison, plot_multi_comparison
9
 
10
  logging.basicConfig()
11
  logging.getLogger().setLevel(logging.INFO)
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def get_comparison(url, target, MIN_DISTANCE = 4):
15
  """ Function for Gradio to combine all helper functions"""
16
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
 
32
  fig = plot_multi_comparison(df, change_points)
33
  return fig
34
 
35
+ def get_auto_edit_decision(url, target, smoothing_window_size=10):
36
+ """ Function for Gradio to combine all helper functions"""
37
+ distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
38
+ if distance == None:
39
+ return None
40
+ raise gr.Error("No matches found!")
41
+ video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
42
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
43
+
44
+ df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
45
+ change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
46
+ edit_decision_list = []
47
+ for cp in change_points:
48
+ decision = f"Video at time {cp.start_time} returns {cp.metric}"
49
+ # edit_decision_list.append(f"Video at time {cp.start_time} returns {cp.metric}")
50
+
51
+
52
+ fig = plot_multi_comparison(df, change_points)
53
+ return fig
54
+
55
 
56
 
57
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
 
71
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
72
 
73
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
74
+ inputs=["text", "text", gr.Slider(1, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
75
  outputs="plot",
76
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
77
 
plot.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import seaborn as sns
6
+ import matplotlib.pyplot as plt
7
+
8
+ from config import FPS
9
+
10
+
11
+ def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
12
+ sns.set_theme()
13
+
14
+ x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
15
+ x = [i/FPS for j in x for i in j]
16
+ y = [i/FPS for i in I]
17
+
18
+ # Create figure and dataframe to plot with sns
19
+ fig = plt.figure()
20
+ # plt.tight_layout()
21
+ df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
22
+ g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)
23
+
24
+ # Set x-labels to be more readable
25
+ x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
26
+ x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
27
+ plt.xticks(x_locs, x_labels)
28
+ plt.xticks(rotation=90)
29
+ plt.xlabel('Time in source video (H:M:S)')
30
+ plt.xlim(0, None)
31
+
32
+ # Set y-labels to be more readable
33
+ y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
34
+ y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
35
+ plt.yticks(y_locs, y_labels)
36
+ plt.ylabel('Time in target video (H:M:S)')
37
+
38
+ # Adjust padding to fit gradio
39
+ plt.subplots_adjust(bottom=0.25, left=0.20)
40
+ return fig
41
+
42
+ def plot_multi_comparison(df, change_points):
43
+ """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
44
+ fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
45
+ sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0,0])
46
+ sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
47
+ sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1,0])
48
+ sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,1])
49
+
50
+ # Plot change point as lines
51
+ sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2,1])
52
+ for x in change_points:
53
+ cp_time = x.start_time
54
+ plt.vlines(x=cp_time, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='red', lw=2)
55
+ rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
56
+ plt.text(x=cp_time, y=rand_y_pos, s=str(np.round(x.confidence, 2)), color='r', rotation=-0.0, fontsize=14)
57
+ plt.xticks(rotation=90)
58
+ return fig
videomatch.py CHANGED
@@ -7,9 +7,11 @@ from kats.detectors.cusum_detection import CUSUMDetector
7
  from kats.detectors.robust_stat_detection import RobustStatDetector
8
  from kats.consts import TimeSeriesData
9
 
10
- import numpy as np
 
11
 
12
  from videohash import compute_hashes, filepath_from_url
 
13
 
14
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
15
  """ Compute hashes of a video and index the video using faiss indices and return the index. """
@@ -98,3 +100,60 @@ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
98
  jump_s = mean_offset_postchange - mean_offset_prechange
99
  print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
100
  return change_points
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from kats.detectors.robust_stat_detection import RobustStatDetector
8
  from kats.consts import TimeSeriesData
9
 
10
+ import numpy as np
11
+ import pandas as pd
12
 
13
  from videohash import compute_hashes, filepath_from_url
14
+ from config import FPS, MIN_DISTANCE, MAX_DISTANCE
15
 
16
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
17
  """ Compute hashes of a video and index the video using faiss indices and return the index. """
 
100
  jump_s = mean_offset_postchange - mean_offset_prechange
101
  print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
102
  return change_points
103
+
104
+ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
105
+ distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
106
+ video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
107
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
108
+
109
+ target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
110
+ target_s = [i/FPS for j in target for i in j]
111
+ source_s = [i/FPS for i in I]
112
+
113
+ # Make df
114
+ df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
115
+ if vanilla_df:
116
+ return df
117
+
118
+ # Minimum distance dataframe ----
119
+ # Group by X so for every second/x there will be 1 value of Y in the end
120
+ # index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
121
+ # df_min = df.loc[index_min_distance]
122
+ # df_min
123
+ # -------------------------------
124
+
125
+ df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
126
+ df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
127
+
128
+ # Group by X so for every second/x there will be 1 value of Y in the end
129
+ grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
130
+ grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
131
+
132
+ # Remake the dataframe
133
+ df = grouped_X.reset_index()
134
+ df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
135
+ df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
136
+
137
+ # Add NAN to "missing" x values (base it off hash vector, not target_s)
138
+ step_size = 1/FPS
139
+ x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
140
+ df['TARGET_S'] = np.round(df['TARGET_S'], 1)
141
+ df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])
142
+
143
+ # Merge dataframes to get NAN values for every missing SOURCE_S
144
+ df = df_complete.merge(df, on='TARGET_S', how='left')
145
+
146
+ # Interpolate between frames since there are missing values
147
+ df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
148
+
149
+ # Add timeshift col and timeshift col with Linearly Interpolated Values
150
+ df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
151
+ df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
152
+
153
+ # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
154
+ df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
155
+ df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
156
+
157
+ # Add time column for plotting
158
+ df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
159
+ return df