Iskaj commited on
Commit
9061a2e
1 Parent(s): c9759ff

change detection to use rolling mode, add json output

Browse files
Files changed (4) hide show
  1. app.py +4 -4
  2. config.py +1 -0
  3. plot.py +47 -18
  4. videomatch.py +8 -2
app.py CHANGED
@@ -31,8 +31,8 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
31
  # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
32
  df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
33
  change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
34
- fig = plot_segment_comparison(df, change_points)
35
- return fig
36
 
37
  def get_auto_edit_decision(url, target, smoothing_window_size=10):
38
  """ Function for Gradio to combine all helper functions"""
@@ -74,8 +74,8 @@ compare_iface = gr.Interface(fn=get_comparison,
74
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
75
 
76
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
77
- inputs=["text", "text", gr.Slider(1, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
78
- outputs="plot",
79
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
80
 
81
  iface = gr.TabbedInterface([auto_compare_iface, compare_iface, index_iface,], ["AutoCompare", "Compare", "Index"])
 
31
  # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
32
  df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
33
  change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
34
+ fig, segment_decisions = plot_segment_comparison(df, change_points)
35
+ return fig, segment_decisions
36
 
37
  def get_auto_edit_decision(url, target, smoothing_window_size=10):
38
  """ Function for Gradio to combine all helper functions"""
 
74
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
75
 
76
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
77
+ inputs=["text", "text", gr.Slider(2, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
78
+ outputs=["plot", "json"],
79
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
80
 
81
  iface = gr.TabbedInterface([auto_compare_iface, compare_iface, index_iface,], ["AutoCompare", "Compare", "Index"])
config.py CHANGED
@@ -5,3 +5,4 @@ VIDEO_DIRECTORY = tempfile.gettempdir()
5
  FPS = 5
6
  MIN_DISTANCE = 4
7
  MAX_DISTANCE = 30
 
 
5
  FPS = 5
6
  MIN_DISTANCE = 4
7
  MAX_DISTANCE = 30
8
+ ROLLING_WINDOW_SIZE = 10
plot.py CHANGED
@@ -71,32 +71,48 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
71
 
72
  def plot_segment_comparison(df, change_points):
73
  """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
74
- fig, ax_arr = plt.subplots(2, 1, figsize=(16, 6), dpi=100, sharex=True)
75
- sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0])
 
76
  # sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
77
 
78
- # Plot change point as lines
 
 
 
 
 
 
 
 
 
 
 
79
  # sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,0])
80
- sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1])
81
  timestamps = change_points_to_segments(df, change_points)
82
 
 
 
 
83
  # To plot the detected segment lines
84
  for x in timestamps:
85
- plt.vlines(x=x, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='black', lw=2, alpha=0.5)
86
- rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
87
 
88
  # To get each detected segment and their mean?
89
  threshold_diff = 1.5 # Average diff threshold
90
  # threshold = 3.0 # s diff threshold
91
  for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
92
 
 
93
  add_offset = np.min(df['SOURCE_S'])
94
 
95
- # Cut out the segment between the segment lines
96
  segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
97
- segment_no_nan = segment[~np.isnan(segment['OFFSET'])] # Remove NaNs
98
- segment_offsets = segment_no_nan['OFFSET'] # np.round(segment_no_nan['OFFSET'], 1)
99
  # segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
 
100
 
101
  # Calculate mean/median/mode
102
  # seg_sum_stat = np.mean(segment_offsets)
@@ -104,27 +120,40 @@ def plot_segment_comparison(df, change_points):
104
  seg_sum_stat = st.mode(segment_offsets)[0][0]
105
 
106
  # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
107
- average_diff = np.mean(np.abs(segment_offsets - seg_sum_stat))
108
 
109
  # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
110
  noisy = False if average_diff < threshold_diff else True
111
- origin_time = add_seconds_to_datetime64(start_time, seg_sum_stat + add_offset)
 
112
 
113
  # Plot green for a confident prediction (straight line), red otherwise
114
  if not noisy:
115
  # Plot estimated straight line
116
- plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='green', lw=3, alpha=0.5)
117
  plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='green', rotation=-0.0, fontsize=14)
118
  else:
119
  # Plot estimated straight line
120
- plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='red', lw=3, alpha=0.5)
121
  plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
122
-
123
 
124
-
125
- # print(f"DIFF={average_diff:.1f} SUMSTAT={seg_sum_stat:.1f} {start_time} -> {end_time} comes from video X, from {origin_time}")
126
-
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  # Return figure
129
  plt.xticks(rotation=90)
130
- return fig
 
71
 
72
  def plot_segment_comparison(df, change_points):
73
  """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
74
+ fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
75
+ # sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0])
76
+ sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
77
  # sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
78
 
79
+ # Get rolling average offset
80
+ # window_size = 30
81
+ # df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).median()
82
+ # df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).apply(lambda x: st.mode(x)[0])
83
+ metric = 'ROLL_OFFSET_MODE' #'OFFSET'
84
+ sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
85
+
86
+ # Plot linearly interpolated values
87
+ sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP")
88
+
89
+ # Plot change point as lines
90
+
91
  # sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,0])
92
+ sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
93
  timestamps = change_points_to_segments(df, change_points)
94
 
95
+ segment_decisions = {}
96
+ seg_i = 0
97
+
98
  # To plot the detected segment lines
99
  for x in timestamps:
100
+ plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
 
101
 
102
  # To get each detected segment and their mean?
103
  threshold_diff = 1.5 # Average diff threshold
104
  # threshold = 3.0 # s diff threshold
105
  for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
106
 
107
+ # add_offset = df.iloc[0]['SOURCE_S'] # np.min(df['SOURCE_S'])
108
  add_offset = np.min(df['SOURCE_S'])
109
 
110
+ # Cut out the segment between the segment lines
111
  segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
112
+ segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
113
+ segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
114
  # segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
115
+ # print(segment_offsets)
116
 
117
  # Calculate mean/median/mode
118
  # seg_sum_stat = np.mean(segment_offsets)
 
120
  seg_sum_stat = st.mode(segment_offsets)[0][0]
121
 
122
  # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
123
+ average_diff = np.median(np.abs(segment_offsets - seg_sum_stat))
124
 
125
  # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
126
  noisy = False if average_diff < threshold_diff else True
127
+ origin_start_time = add_seconds_to_datetime64(start_time, seg_sum_stat + add_offset)
128
+ origin_end_time = add_seconds_to_datetime64(end_time, seg_sum_stat + add_offset)
129
 
130
  # Plot green for a confident prediction (straight line), red otherwise
131
  if not noisy:
132
  # Plot estimated straight line
133
+ plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='green', lw=5, alpha=0.5)
134
  plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='green', rotation=-0.0, fontsize=14)
135
  else:
136
  # Plot estimated straight line
137
+ plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='red', lw=5, alpha=0.5)
138
  plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
 
139
 
140
+ # Decisions about segments
141
+ start_time_str = pd.to_datetime(start_time).strftime('%H:%M:%S')
142
+ end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
143
+ origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
144
+ origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
145
+ video_id = "placeholder_video_id"
146
+
147
+ decision = {"Target Start Time" : start_time_str,
148
+ "Target End Time" : end_time_str,
149
+ "Source Start Time" : origin_start_time_str,
150
+ "Source End Time" : origin_end_time_str,
151
+ "Video ID" : video_id,
152
+ "Explanation" : f"{start_time_str} -> {end_time_str} comes from video {video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
153
+ segment_decisions[f'Segment {seg_i}'] = decision
154
+ seg_i += 1
155
+ print(decision)
156
 
157
  # Return figure
158
  plt.xticks(rotation=90)
159
+ return fig, segment_decisions
videomatch.py CHANGED
@@ -7,11 +7,13 @@ from kats.detectors.cusum_detection import CUSUMDetector
7
  from kats.detectors.robust_stat_detection import RobustStatDetector
8
  from kats.consts import TimeSeriesData
9
 
 
 
10
  import numpy as np
11
  import pandas as pd
12
 
13
  from videohash import compute_hashes, filepath_from_url
14
- from config import FPS, MIN_DISTANCE, MAX_DISTANCE
15
 
16
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
17
  """ Compute hashes of a video and index the video using faiss indices and return the index. """
@@ -79,6 +81,7 @@ def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
79
 
80
  def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
81
  tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
 
82
  if method.upper() == "CUSUM":
83
  detector = CUSUMDetector(tsd)
84
  elif method.upper() == "ROBUST":
@@ -93,7 +96,7 @@ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
93
  print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
94
  return change_points
95
 
96
- def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
97
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
98
  _, hash_vectors = get_video_index(url)
99
  target_index, _ = get_video_index(target)
@@ -147,6 +150,9 @@ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
147
  df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
148
  df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
149
 
 
 
 
150
  # Add time column for plotting
151
  df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
152
  return df
 
7
  from kats.detectors.robust_stat_detection import RobustStatDetector
8
  from kats.consts import TimeSeriesData
9
 
10
+ from scipy import stats as st
11
+
12
  import numpy as np
13
  import pandas as pd
14
 
15
  from videohash import compute_hashes, filepath_from_url
16
+ from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
17
 
18
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
19
  """ Compute hashes of a video and index the video using faiss indices and return the index. """
 
81
 
82
  def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
83
  tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
84
+ # tsd = TimeSeriesData(df.loc[:,['time','ROLL_OFFSET_MODE']])
85
  if method.upper() == "CUSUM":
86
  detector = CUSUMDetector(tsd)
87
  elif method.upper() == "ROBUST":
 
96
  print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
97
  return change_points
98
 
99
+ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
100
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
101
  _, hash_vectors = get_video_index(url)
102
  target_index, _ = get_video_index(target)
 
150
  df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
151
  df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
152
 
153
+ # Add rolling window mode
154
+ df['ROLL_OFFSET_MODE'] = np.round(df['OFFSET_LIP'], 0).rolling(window_size, center=True, min_periods=1).apply(lambda x: st.mode(x)[0])
155
+
156
  # Add time column for plotting
157
  df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
158
  return df