Spaces:
Build error
Build error
Iskaj
commited on
Commit
•
9061a2e
1
Parent(s):
c9759ff
change detection to use rolling mode, add json output
Browse files
app.py
CHANGED
@@ -31,8 +31,8 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
|
|
31 |
# fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
|
32 |
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
33 |
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
|
34 |
-
fig = plot_segment_comparison(df, change_points)
|
35 |
-
return fig
|
36 |
|
37 |
def get_auto_edit_decision(url, target, smoothing_window_size=10):
|
38 |
""" Function for Gradio to combine all helper functions"""
|
@@ -74,8 +74,8 @@ compare_iface = gr.Interface(fn=get_comparison,
|
|
74 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
75 |
|
76 |
auto_compare_iface = gr.Interface(fn=get_auto_comparison,
|
77 |
-
inputs=["text", "text", gr.Slider(
|
78 |
-
outputs="plot",
|
79 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
80 |
|
81 |
iface = gr.TabbedInterface([auto_compare_iface, compare_iface, index_iface,], ["AutoCompare", "Compare", "Index"])
|
|
|
31 |
# fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
|
32 |
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
33 |
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
|
34 |
+
fig, segment_decisions = plot_segment_comparison(df, change_points)
|
35 |
+
return fig, segment_decisions
|
36 |
|
37 |
def get_auto_edit_decision(url, target, smoothing_window_size=10):
|
38 |
""" Function for Gradio to combine all helper functions"""
|
|
|
74 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
75 |
|
76 |
auto_compare_iface = gr.Interface(fn=get_auto_comparison,
|
77 |
+
inputs=["text", "text", gr.Slider(2, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
|
78 |
+
outputs=["plot", "json"],
|
79 |
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
80 |
|
81 |
iface = gr.TabbedInterface([auto_compare_iface, compare_iface, index_iface,], ["AutoCompare", "Compare", "Index"])
|
config.py
CHANGED
@@ -5,3 +5,4 @@ VIDEO_DIRECTORY = tempfile.gettempdir()
|
|
5 |
FPS = 5
|
6 |
MIN_DISTANCE = 4
|
7 |
MAX_DISTANCE = 30
|
|
|
|
5 |
FPS = 5
|
6 |
MIN_DISTANCE = 4
|
7 |
MAX_DISTANCE = 30
|
8 |
+
ROLLING_WINDOW_SIZE = 10
|
plot.py
CHANGED
@@ -71,32 +71,48 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
|
|
71 |
|
72 |
def plot_segment_comparison(df, change_points):
|
73 |
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
|
74 |
-
fig, ax_arr = plt.subplots(
|
75 |
-
sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0])
|
|
|
76 |
# sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
|
77 |
|
78 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
# sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,0])
|
80 |
-
sns.
|
81 |
timestamps = change_points_to_segments(df, change_points)
|
82 |
|
|
|
|
|
|
|
83 |
# To plot the detected segment lines
|
84 |
for x in timestamps:
|
85 |
-
plt.vlines(x=x, ymin=np.min(df[
|
86 |
-
rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
|
87 |
|
88 |
# To get each detected segment and their mean?
|
89 |
threshold_diff = 1.5 # Average diff threshold
|
90 |
# threshold = 3.0 # s diff threshold
|
91 |
for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
|
92 |
|
|
|
93 |
add_offset = np.min(df['SOURCE_S'])
|
94 |
|
95 |
-
# Cut out the segment between the segment lines
|
96 |
segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
|
97 |
-
segment_no_nan = segment[~np.isnan(segment[
|
98 |
-
segment_offsets = segment_no_nan[
|
99 |
# segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
|
|
|
100 |
|
101 |
# Calculate mean/median/mode
|
102 |
# seg_sum_stat = np.mean(segment_offsets)
|
@@ -104,27 +120,40 @@ def plot_segment_comparison(df, change_points):
|
|
104 |
seg_sum_stat = st.mode(segment_offsets)[0][0]
|
105 |
|
106 |
# Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
|
107 |
-
average_diff = np.
|
108 |
|
109 |
# If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
|
110 |
noisy = False if average_diff < threshold_diff else True
|
111 |
-
|
|
|
112 |
|
113 |
# Plot green for a confident prediction (straight line), red otherwise
|
114 |
if not noisy:
|
115 |
# Plot estimated straight line
|
116 |
-
plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='green', lw=
|
117 |
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='green', rotation=-0.0, fontsize=14)
|
118 |
else:
|
119 |
# Plot estimated straight line
|
120 |
-
plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='red', lw=
|
121 |
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
# Return figure
|
129 |
plt.xticks(rotation=90)
|
130 |
-
return fig
|
|
|
71 |
|
72 |
def plot_segment_comparison(df, change_points):
|
73 |
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
|
74 |
+
fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
|
75 |
+
# sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0])
|
76 |
+
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
|
77 |
# sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
|
78 |
|
79 |
+
# Get rolling average offset
|
80 |
+
# window_size = 30
|
81 |
+
# df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).median()
|
82 |
+
# df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).apply(lambda x: st.mode(x)[0])
|
83 |
+
metric = 'ROLL_OFFSET_MODE' #'OFFSET'
|
84 |
+
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
|
85 |
+
|
86 |
+
# Plot linearly interpolated values
|
87 |
+
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP")
|
88 |
+
|
89 |
+
# Plot change point as lines
|
90 |
+
|
91 |
# sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,0])
|
92 |
+
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
|
93 |
timestamps = change_points_to_segments(df, change_points)
|
94 |
|
95 |
+
segment_decisions = {}
|
96 |
+
seg_i = 0
|
97 |
+
|
98 |
# To plot the detected segment lines
|
99 |
for x in timestamps:
|
100 |
+
plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
|
|
|
101 |
|
102 |
# To get each detected segment and their mean?
|
103 |
threshold_diff = 1.5 # Average diff threshold
|
104 |
# threshold = 3.0 # s diff threshold
|
105 |
for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
|
106 |
|
107 |
+
# add_offset = df.iloc[0]['SOURCE_S'] # np.min(df['SOURCE_S'])
|
108 |
add_offset = np.min(df['SOURCE_S'])
|
109 |
|
110 |
+
# Cut out the segment between the segment lines
|
111 |
segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
|
112 |
+
segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
|
113 |
+
segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
|
114 |
# segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
|
115 |
+
# print(segment_offsets)
|
116 |
|
117 |
# Calculate mean/median/mode
|
118 |
# seg_sum_stat = np.mean(segment_offsets)
|
|
|
120 |
seg_sum_stat = st.mode(segment_offsets)[0][0]
|
121 |
|
122 |
# Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
|
123 |
+
average_diff = np.median(np.abs(segment_offsets - seg_sum_stat))
|
124 |
|
125 |
# If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
|
126 |
noisy = False if average_diff < threshold_diff else True
|
127 |
+
origin_start_time = add_seconds_to_datetime64(start_time, seg_sum_stat + add_offset)
|
128 |
+
origin_end_time = add_seconds_to_datetime64(end_time, seg_sum_stat + add_offset)
|
129 |
|
130 |
# Plot green for a confident prediction (straight line), red otherwise
|
131 |
if not noisy:
|
132 |
# Plot estimated straight line
|
133 |
+
plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='green', lw=5, alpha=0.5)
|
134 |
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='green', rotation=-0.0, fontsize=14)
|
135 |
else:
|
136 |
# Plot estimated straight line
|
137 |
+
plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='red', lw=5, alpha=0.5)
|
138 |
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
|
|
|
139 |
|
140 |
+
# Decisions about segments
|
141 |
+
start_time_str = pd.to_datetime(start_time).strftime('%H:%M:%S')
|
142 |
+
end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
|
143 |
+
origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
|
144 |
+
origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
|
145 |
+
video_id = "placeholder_video_id"
|
146 |
+
|
147 |
+
decision = {"Target Start Time" : start_time_str,
|
148 |
+
"Target End Time" : end_time_str,
|
149 |
+
"Source Start Time" : origin_start_time_str,
|
150 |
+
"Source End Time" : origin_end_time_str,
|
151 |
+
"Video ID" : video_id,
|
152 |
+
"Explanation" : f"{start_time_str} -> {end_time_str} comes from video {video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
|
153 |
+
segment_decisions[f'Segment {seg_i}'] = decision
|
154 |
+
seg_i += 1
|
155 |
+
print(decision)
|
156 |
|
157 |
# Return figure
|
158 |
plt.xticks(rotation=90)
|
159 |
+
return fig, segment_decisions
|
videomatch.py
CHANGED
@@ -7,11 +7,13 @@ from kats.detectors.cusum_detection import CUSUMDetector
|
|
7 |
from kats.detectors.robust_stat_detection import RobustStatDetector
|
8 |
from kats.consts import TimeSeriesData
|
9 |
|
|
|
|
|
10 |
import numpy as np
|
11 |
import pandas as pd
|
12 |
|
13 |
from videohash import compute_hashes, filepath_from_url
|
14 |
-
from config import FPS, MIN_DISTANCE, MAX_DISTANCE
|
15 |
|
16 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
17 |
""" Compute hashes of a video and index the video using faiss indices and return the index. """
|
@@ -79,6 +81,7 @@ def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
|
|
79 |
|
80 |
def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
|
81 |
tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
|
|
|
82 |
if method.upper() == "CUSUM":
|
83 |
detector = CUSUMDetector(tsd)
|
84 |
elif method.upper() == "ROBUST":
|
@@ -93,7 +96,7 @@ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
|
|
93 |
print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
|
94 |
return change_points
|
95 |
|
96 |
-
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
|
97 |
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
98 |
_, hash_vectors = get_video_index(url)
|
99 |
target_index, _ = get_video_index(target)
|
@@ -147,6 +150,9 @@ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
|
|
147 |
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
|
148 |
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
|
149 |
|
|
|
|
|
|
|
150 |
# Add time column for plotting
|
151 |
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
|
152 |
return df
|
|
|
7 |
from kats.detectors.robust_stat_detection import RobustStatDetector
|
8 |
from kats.consts import TimeSeriesData
|
9 |
|
10 |
+
from scipy import stats as st
|
11 |
+
|
12 |
import numpy as np
|
13 |
import pandas as pd
|
14 |
|
15 |
from videohash import compute_hashes, filepath_from_url
|
16 |
+
from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
|
17 |
|
18 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
19 |
""" Compute hashes of a video and index the video using faiss indices and return the index. """
|
|
|
81 |
|
82 |
def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
|
83 |
tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
|
84 |
+
# tsd = TimeSeriesData(df.loc[:,['time','ROLL_OFFSET_MODE']])
|
85 |
if method.upper() == "CUSUM":
|
86 |
detector = CUSUMDetector(tsd)
|
87 |
elif method.upper() == "ROBUST":
|
|
|
96 |
print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
|
97 |
return change_points
|
98 |
|
99 |
+
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
|
100 |
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
101 |
_, hash_vectors = get_video_index(url)
|
102 |
target_index, _ = get_video_index(target)
|
|
|
150 |
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
|
151 |
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
|
152 |
|
153 |
+
# Add rolling window mode
|
154 |
+
df['ROLL_OFFSET_MODE'] = np.round(df['OFFSET_LIP'], 0).rolling(window_size, center=True, min_periods=1).apply(lambda x: st.mode(x)[0])
|
155 |
+
|
156 |
# Add time column for plotting
|
157 |
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
|
158 |
return df
|