Iskaj commited on
Commit
879e657
1 Parent(s): 9061a2e

minor changes to code style and comments

Browse files
Files changed (4) hide show
  1. app.py +12 -29
  2. plot.py +19 -26
  3. videohash.py +1 -0
  4. videomatch.py +2 -3
app.py CHANGED
@@ -19,7 +19,7 @@ def get_comparison(url, target, MIN_DISTANCE = 4):
19
  fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
20
  return fig
21
 
22
- def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
23
  """ Function for Gradio to combine all helper functions"""
24
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
25
  if distance == None:
@@ -27,34 +27,14 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
27
  raise gr.Error("No matches found!")
28
  video_index, hash_vectors = get_video_index(url)
29
  target_index, _ = get_video_index(target)
30
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
31
- # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
32
- df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
33
- change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
34
- fig, segment_decisions = plot_segment_comparison(df, change_points)
35
- return fig, segment_decisions
36
-
37
- def get_auto_edit_decision(url, target, smoothing_window_size=10):
38
- """ Function for Gradio to combine all helper functions"""
39
- distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
40
- if distance == None:
41
- return None
42
- raise gr.Error("No matches found!")
43
- video_index, hash_vectors = get_video_index(url)
44
- target_index, _ = get_video_index(target)
45
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
46
-
47
- df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
48
- change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
49
- edit_decision_list = []
50
- for cp in change_points:
51
- decision = f"Video at time {cp.start_time} returns {cp.metric}"
52
- # edit_decision_list.append(f"Video at time {cp.start_time} returns {cp.metric}")
53
-
54
-
55
- fig = plot_multi_comparison(df, change_points)
56
- return fig
57
 
 
 
 
 
 
 
 
58
 
59
 
60
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
@@ -74,7 +54,10 @@ compare_iface = gr.Interface(fn=get_comparison,
74
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
75
 
76
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
77
- inputs=["text", "text", gr.Slider(2, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
 
 
 
78
  outputs=["plot", "json"],
79
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
80
 
 
19
  fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
20
  return fig
21
 
22
+ def get_auto_comparison(url, target, smoothing_window_size=10, metric="OFFSET_LIP"):
23
  """ Function for Gradio to combine all helper functions"""
24
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
25
  if distance == None:
 
27
  raise gr.Error("No matches found!")
28
  video_index, hash_vectors = get_video_index(url)
29
  target_index, _ = get_video_index(target)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # For each video do...
32
+ for i in range(0, 1):
33
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
34
+ df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
35
+ change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST", metric=metric)
36
+ fig, segment_decisions = plot_segment_comparison(df, change_points, target)
37
+ return fig, segment_decisions
38
 
39
 
40
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
 
54
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
55
 
56
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
57
+ inputs=["text",
58
+ "text",
59
+ gr.Slider(2, 50, 10, step=1),
60
+ gr.Dropdown(choices=["OFFSET_LIP", "ROLL_OFFSET_MODE"], value="OFFSET_LIP")],
61
  outputs=["plot", "json"],
62
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
63
 
plot.py CHANGED
@@ -69,29 +69,25 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
69
  return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
70
  return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
71
 
72
- def plot_segment_comparison(df, change_points):
73
  """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
74
  fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
75
- # sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0])
 
76
  sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
77
- # sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
78
-
79
- # Get rolling average offset
80
- # window_size = 30
81
- # df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).median()
82
- # df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).apply(lambda x: st.mode(x)[0])
83
- metric = 'ROLL_OFFSET_MODE' #'OFFSET'
84
- sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
85
 
86
  # Plot linearly interpolated values
87
- sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP")
88
 
89
- # Plot change point as lines
90
-
91
- # sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,0])
 
 
92
  sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
93
  timestamps = change_points_to_segments(df, change_points)
94
 
 
95
  segment_decisions = {}
96
  seg_i = 0
97
 
@@ -99,20 +95,16 @@ def plot_segment_comparison(df, change_points):
99
  for x in timestamps:
100
  plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
101
 
102
- # To get each detected segment and their mean?
103
- threshold_diff = 1.5 # Average diff threshold
104
- # threshold = 3.0 # s diff threshold
105
  for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
106
 
107
- # add_offset = df.iloc[0]['SOURCE_S'] # np.min(df['SOURCE_S'])
108
  add_offset = np.min(df['SOURCE_S'])
109
 
110
  # Cut out the segment between the segment lines
111
  segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
112
  segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
113
  segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
114
- # segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
115
- # print(segment_offsets)
116
 
117
  # Calculate mean/median/mode
118
  # seg_sum_stat = np.mean(segment_offsets)
@@ -120,7 +112,8 @@ def plot_segment_comparison(df, change_points):
120
  seg_sum_stat = st.mode(segment_offsets)[0][0]
121
 
122
  # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
123
- average_diff = np.median(np.abs(segment_offsets - seg_sum_stat))
 
124
 
125
  # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
126
  noisy = False if average_diff < threshold_diff else True
@@ -142,17 +135,17 @@ def plot_segment_comparison(df, change_points):
142
  end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
143
  origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
144
  origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
145
- video_id = "placeholder_video_id"
146
-
147
  decision = {"Target Start Time" : start_time_str,
148
  "Target End Time" : end_time_str,
149
  "Source Start Time" : origin_start_time_str,
150
  "Source End Time" : origin_end_time_str,
151
- "Video ID" : video_id,
152
- "Explanation" : f"{start_time_str} -> {end_time_str} comes from video {video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
 
 
153
  segment_decisions[f'Segment {seg_i}'] = decision
154
  seg_i += 1
155
- print(decision)
156
 
157
  # Return figure
158
  plt.xticks(rotation=90)
 
69
  return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
70
  return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
71
 
72
+ def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID"):
73
  """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
74
  fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
75
+
76
+ # Plot original datapoints without linear interpolation, offset by target video time
77
  sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
 
 
 
 
 
 
 
 
78
 
79
  # Plot linearly interpolated values
80
+ sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP", color='orange')
81
 
82
+ # Plot our target metric wherer
83
+ metric = 'ROLL_OFFSET_MODE' # 'OFFSET'
84
+ sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
85
+
86
+ # Plot deteected change points as lines which will indicate the segments
87
  sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
88
  timestamps = change_points_to_segments(df, change_points)
89
 
90
+ # To store "decisions" about segments
91
  segment_decisions = {}
92
  seg_i = 0
93
 
 
95
  for x in timestamps:
96
  plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
97
 
98
+ threshold_diff = 1.5 # Average segment difference threshold for plotting
 
 
99
  for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
100
 
101
+ # Time to add to each origin time to get the correct time back since it is offset by add_offset
102
  add_offset = np.min(df['SOURCE_S'])
103
 
104
  # Cut out the segment between the segment lines
105
  segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
106
  segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
107
  segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
 
 
108
 
109
  # Calculate mean/median/mode
110
  # seg_sum_stat = np.mean(segment_offsets)
 
112
  seg_sum_stat = st.mode(segment_offsets)[0][0]
113
 
114
  # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
115
+ average_diff = np.median(np.abs(segment_no_nan['OFFSET_LIP'] - seg_sum_stat))
116
+ average_offset = np.mean(segment_no_nan['OFFSET_LIP'])
117
 
118
  # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
119
  noisy = False if average_diff < threshold_diff else True
 
135
  end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
136
  origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
137
  origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
 
 
138
  decision = {"Target Start Time" : start_time_str,
139
  "Target End Time" : end_time_str,
140
  "Source Start Time" : origin_start_time_str,
141
  "Source End Time" : origin_end_time_str,
142
+ "Source Video ID" : video_id,
143
+ "Uncertainty" : np.round(average_diff, 3),
144
+ "Average Offset in Seconds" : np.round(average_offset, 3),
145
+ "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID '{video_id}' from {origin_start_time_str} -> {origin_end_time_str}"}
146
  segment_decisions[f'Segment {seg_i}'] = decision
147
  seg_i += 1
148
+ # print(decision)
149
 
150
  # Return figure
151
  plt.xticks(rotation=90)
videohash.py CHANGED
@@ -18,6 +18,7 @@ def filepath_from_url(url):
18
 
19
  def download_video_from_url(url):
20
  """Download video from url or return md5 hash as video name"""
 
21
  filepath = filepath_from_url(url)
22
  if not os.path.exists(filepath):
23
  with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
 
18
 
19
  def download_video_from_url(url):
20
  """Download video from url or return md5 hash as video name"""
21
+ # TODO: Make work for Google link
22
  filepath = filepath_from_url(url)
23
  if not os.path.exists(filepath):
24
  with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
videomatch.py CHANGED
@@ -79,9 +79,8 @@ def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
79
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
80
  return None
81
 
82
- def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
83
- tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
84
- # tsd = TimeSeriesData(df.loc[:,['time','ROLL_OFFSET_MODE']])
85
  if method.upper() == "CUSUM":
86
  detector = CUSUMDetector(tsd)
87
  elif method.upper() == "ROBUST":
 
79
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
80
  return None
81
 
82
+ def get_change_points(df, smoothing_window_size=10, method='CUSUM', metric="OFFSET_LIP"):
83
+ tsd = TimeSeriesData(df.loc[:,['time', metric]])
 
84
  if method.upper() == "CUSUM":
85
  detector = CUSUMDetector(tsd)
86
  elif method.upper() == "ROBUST":