Spaces:
Build error
Build error
Iskaj
commited on
Commit
•
879e657
1
Parent(s):
9061a2e
minor changes to code style and comments
Browse files- app.py +12 -29
- plot.py +19 -26
- videohash.py +1 -0
- videomatch.py +2 -3
app.py
CHANGED
@@ -19,7 +19,7 @@ def get_comparison(url, target, MIN_DISTANCE = 4):
|
|
19 |
fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
|
20 |
return fig
|
21 |
|
22 |
-
def get_auto_comparison(url, target, smoothing_window_size=10,
|
23 |
""" Function for Gradio to combine all helper functions"""
|
24 |
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
25 |
if distance == None:
|
@@ -27,34 +27,14 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
|
|
27 |
raise gr.Error("No matches found!")
|
28 |
video_index, hash_vectors = get_video_index(url)
|
29 |
target_index, _ = get_video_index(target)
|
30 |
-
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
31 |
-
# fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
|
32 |
-
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
33 |
-
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
|
34 |
-
fig, segment_decisions = plot_segment_comparison(df, change_points)
|
35 |
-
return fig, segment_decisions
|
36 |
-
|
37 |
-
def get_auto_edit_decision(url, target, smoothing_window_size=10):
|
38 |
-
""" Function for Gradio to combine all helper functions"""
|
39 |
-
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
40 |
-
if distance == None:
|
41 |
-
return None
|
42 |
-
raise gr.Error("No matches found!")
|
43 |
-
video_index, hash_vectors = get_video_index(url)
|
44 |
-
target_index, _ = get_video_index(target)
|
45 |
-
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
46 |
-
|
47 |
-
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
48 |
-
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
|
49 |
-
edit_decision_list = []
|
50 |
-
for cp in change_points:
|
51 |
-
decision = f"Video at time {cp.start_time} returns {cp.metric}"
|
52 |
-
# edit_decision_list.append(f"Video at time {cp.start_time} returns {cp.metric}")
|
53 |
-
|
54 |
-
|
55 |
-
fig = plot_multi_comparison(df, change_points)
|
56 |
-
return fig
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
|
60 |
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
@@ -74,7 +54,10 @@ compare_iface = gr.Interface(fn=get_comparison,
|
|
74 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
75 |
|
76 |
auto_compare_iface = gr.Interface(fn=get_auto_comparison,
|
77 |
-
inputs=["text",
|
|
|
|
|
|
|
78 |
outputs=["plot", "json"],
|
79 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
80 |
|
|
|
19 |
fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
|
20 |
return fig
|
21 |
|
22 |
+
def get_auto_comparison(url, target, smoothing_window_size=10, metric="OFFSET_LIP"):
|
23 |
""" Function for Gradio to combine all helper functions"""
|
24 |
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
25 |
if distance == None:
|
|
|
27 |
raise gr.Error("No matches found!")
|
28 |
video_index, hash_vectors = get_video_index(url)
|
29 |
target_index, _ = get_video_index(target)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
# For each video do...
|
32 |
+
for i in range(0, 1):
|
33 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
34 |
+
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
35 |
+
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST", metric=metric)
|
36 |
+
fig, segment_decisions = plot_segment_comparison(df, change_points, target)
|
37 |
+
return fig, segment_decisions
|
38 |
|
39 |
|
40 |
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
|
|
54 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
55 |
|
56 |
auto_compare_iface = gr.Interface(fn=get_auto_comparison,
|
57 |
+
inputs=["text",
|
58 |
+
"text",
|
59 |
+
gr.Slider(2, 50, 10, step=1),
|
60 |
+
gr.Dropdown(choices=["OFFSET_LIP", "ROLL_OFFSET_MODE"], value="OFFSET_LIP")],
|
61 |
outputs=["plot", "json"],
|
62 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
63 |
|
plot.py
CHANGED
@@ -69,29 +69,25 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
|
|
69 |
return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
|
70 |
return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
|
71 |
|
72 |
-
def plot_segment_comparison(df, change_points):
|
73 |
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
|
74 |
fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
|
75 |
-
|
|
|
76 |
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
|
77 |
-
# sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
|
78 |
-
|
79 |
-
# Get rolling average offset
|
80 |
-
# window_size = 30
|
81 |
-
# df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).median()
|
82 |
-
# df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).apply(lambda x: st.mode(x)[0])
|
83 |
-
metric = 'ROLL_OFFSET_MODE' #'OFFSET'
|
84 |
-
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
|
85 |
|
86 |
# Plot linearly interpolated values
|
87 |
-
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP")
|
88 |
|
89 |
-
# Plot
|
90 |
-
|
91 |
-
|
|
|
|
|
92 |
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
|
93 |
timestamps = change_points_to_segments(df, change_points)
|
94 |
|
|
|
95 |
segment_decisions = {}
|
96 |
seg_i = 0
|
97 |
|
@@ -99,20 +95,16 @@ def plot_segment_comparison(df, change_points):
|
|
99 |
for x in timestamps:
|
100 |
plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
|
101 |
|
102 |
-
|
103 |
-
threshold_diff = 1.5 # Average diff threshold
|
104 |
-
# threshold = 3.0 # s diff threshold
|
105 |
for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
|
106 |
|
107 |
-
#
|
108 |
add_offset = np.min(df['SOURCE_S'])
|
109 |
|
110 |
# Cut out the segment between the segment lines
|
111 |
segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
|
112 |
segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
|
113 |
segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
|
114 |
-
# segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
|
115 |
-
# print(segment_offsets)
|
116 |
|
117 |
# Calculate mean/median/mode
|
118 |
# seg_sum_stat = np.mean(segment_offsets)
|
@@ -120,7 +112,8 @@ def plot_segment_comparison(df, change_points):
|
|
120 |
seg_sum_stat = st.mode(segment_offsets)[0][0]
|
121 |
|
122 |
# Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
|
123 |
-
average_diff = np.median(np.abs(
|
|
|
124 |
|
125 |
# If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
|
126 |
noisy = False if average_diff < threshold_diff else True
|
@@ -142,17 +135,17 @@ def plot_segment_comparison(df, change_points):
|
|
142 |
end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
|
143 |
origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
|
144 |
origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
|
145 |
-
video_id = "placeholder_video_id"
|
146 |
-
|
147 |
decision = {"Target Start Time" : start_time_str,
|
148 |
"Target End Time" : end_time_str,
|
149 |
"Source Start Time" : origin_start_time_str,
|
150 |
"Source End Time" : origin_end_time_str,
|
151 |
-
"Video ID" : video_id,
|
152 |
-
"
|
|
|
|
|
153 |
segment_decisions[f'Segment {seg_i}'] = decision
|
154 |
seg_i += 1
|
155 |
-
print(decision)
|
156 |
|
157 |
# Return figure
|
158 |
plt.xticks(rotation=90)
|
|
|
69 |
return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
|
70 |
return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
|
71 |
|
72 |
+
def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID"):
|
73 |
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
|
74 |
fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
|
75 |
+
|
76 |
+
# Plot original datapoints without linear interpolation, offset by target video time
|
77 |
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
# Plot linearly interpolated values
|
80 |
+
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP", color='orange')
|
81 |
|
82 |
+
# Plot our target metric wherer
|
83 |
+
metric = 'ROLL_OFFSET_MODE' # 'OFFSET'
|
84 |
+
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
|
85 |
+
|
86 |
+
# Plot deteected change points as lines which will indicate the segments
|
87 |
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
|
88 |
timestamps = change_points_to_segments(df, change_points)
|
89 |
|
90 |
+
# To store "decisions" about segments
|
91 |
segment_decisions = {}
|
92 |
seg_i = 0
|
93 |
|
|
|
95 |
for x in timestamps:
|
96 |
plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
|
97 |
|
98 |
+
threshold_diff = 1.5 # Average segment difference threshold for plotting
|
|
|
|
|
99 |
for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
|
100 |
|
101 |
+
# Time to add to each origin time to get the correct time back since it is offset by add_offset
|
102 |
add_offset = np.min(df['SOURCE_S'])
|
103 |
|
104 |
# Cut out the segment between the segment lines
|
105 |
segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
|
106 |
segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
|
107 |
segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
|
|
|
|
|
108 |
|
109 |
# Calculate mean/median/mode
|
110 |
# seg_sum_stat = np.mean(segment_offsets)
|
|
|
112 |
seg_sum_stat = st.mode(segment_offsets)[0][0]
|
113 |
|
114 |
# Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
|
115 |
+
average_diff = np.median(np.abs(segment_no_nan['OFFSET_LIP'] - seg_sum_stat))
|
116 |
+
average_offset = np.mean(segment_no_nan['OFFSET_LIP'])
|
117 |
|
118 |
# If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
|
119 |
noisy = False if average_diff < threshold_diff else True
|
|
|
135 |
end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
|
136 |
origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
|
137 |
origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
|
|
|
|
|
138 |
decision = {"Target Start Time" : start_time_str,
|
139 |
"Target End Time" : end_time_str,
|
140 |
"Source Start Time" : origin_start_time_str,
|
141 |
"Source End Time" : origin_end_time_str,
|
142 |
+
"Source Video ID" : video_id,
|
143 |
+
"Uncertainty" : np.round(average_diff, 3),
|
144 |
+
"Average Offset in Seconds" : np.round(average_offset, 3),
|
145 |
+
"Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID '{video_id}' from {origin_start_time_str} -> {origin_end_time_str}"}
|
146 |
segment_decisions[f'Segment {seg_i}'] = decision
|
147 |
seg_i += 1
|
148 |
+
# print(decision)
|
149 |
|
150 |
# Return figure
|
151 |
plt.xticks(rotation=90)
|
videohash.py
CHANGED
@@ -18,6 +18,7 @@ def filepath_from_url(url):
|
|
18 |
|
19 |
def download_video_from_url(url):
|
20 |
"""Download video from url or return md5 hash as video name"""
|
|
|
21 |
filepath = filepath_from_url(url)
|
22 |
if not os.path.exists(filepath):
|
23 |
with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
|
|
|
18 |
|
19 |
def download_video_from_url(url):
|
20 |
"""Download video from url or return md5 hash as video name"""
|
21 |
+
# TODO: Make work for Google link
|
22 |
filepath = filepath_from_url(url)
|
23 |
if not os.path.exists(filepath):
|
24 |
with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
|
videomatch.py
CHANGED
@@ -79,9 +79,8 @@ def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
|
|
79 |
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
|
80 |
return None
|
81 |
|
82 |
-
def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
|
83 |
-
tsd = TimeSeriesData(df.loc[:,['time',
|
84 |
-
# tsd = TimeSeriesData(df.loc[:,['time','ROLL_OFFSET_MODE']])
|
85 |
if method.upper() == "CUSUM":
|
86 |
detector = CUSUMDetector(tsd)
|
87 |
elif method.upper() == "ROBUST":
|
|
|
79 |
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
|
80 |
return None
|
81 |
|
82 |
+
def get_change_points(df, smoothing_window_size=10, method='CUSUM', metric="OFFSET_LIP"):
|
83 |
+
tsd = TimeSeriesData(df.loc[:,['time', metric]])
|
|
|
84 |
if method.upper() == "CUSUM":
|
85 |
detector = CUSUMDetector(tsd)
|
86 |
elif method.upper() == "ROBUST":
|