Iskaj commited on
Commit
9807395
1 Parent(s): ad4bd11

prepare code for multiple video comparison

Browse files
Files changed (3) hide show
  1. app.py +10 -10
  2. plot.py +3 -3
  3. videomatch.py +20 -18
app.py CHANGED
@@ -21,20 +21,20 @@ def get_comparison(url, target, MIN_DISTANCE = 4):
21
 
22
  def get_auto_comparison(url, target, smoothing_window_size=10, metric="OFFSET_LIP"):
23
  """ Function for Gradio to combine all helper functions"""
24
- distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
25
- if distance == None:
26
- return None
27
- raise gr.Error("No matches found!")
28
- video_index, hash_vectors = get_video_index(url)
29
  target_index, _ = get_video_index(target)
 
 
 
 
30
 
31
  # For each video do...
32
  for i in range(0, 1):
33
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
34
- df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
35
- change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST", metric=metric)
36
- fig, segment_decisions = plot_segment_comparison(df, change_points, target)
37
- return fig, segment_decisions
38
 
39
 
40
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
 
21
 
22
  def get_auto_comparison(url, target, smoothing_window_size=10, metric="OFFSET_LIP"):
23
  """ Function for Gradio to combine all helper functions"""
24
+ source_index, source_hash_vectors = get_video_index(url)
 
 
 
 
25
  target_index, _ = get_video_index(target)
26
+ distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
27
+ if distance == None:
28
+ return _, []
29
+ # raise gr.Error("No matches found!")
30
 
31
  # For each video do...
32
  for i in range(0, 1):
33
+ lims, D, I, hash_vectors = compare_videos(source_hash_vectors, target_index, MIN_DISTANCE = distance)
34
+ df = get_videomatch_df(lims, D, I, hash_vectors, distance)
35
+ change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, metric=metric, method="ROBUST")
36
+ fig, segment_decision = plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID")
37
+ return fig, segment_decision
38
 
39
 
40
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
plot.py CHANGED
@@ -88,7 +88,7 @@ def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID"):
88
  timestamps = change_points_to_segments(df, change_points)
89
 
90
  # To store "decisions" about segments
91
- segment_decisions = {}
92
  seg_i = 0
93
 
94
  # To plot the detected segment lines
@@ -142,8 +142,8 @@ def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID"):
142
  "Source Video ID" : video_id,
143
  "Uncertainty" : np.round(average_diff, 3),
144
  "Average Offset in Seconds" : np.round(average_offset, 3),
145
- "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID '{video_id}' from {origin_start_time_str} -> {origin_end_time_str}"}
146
- segment_decisions[f'Segment {seg_i}'] = decision
147
  seg_i += 1
148
  # print(decision)
149
 
 
88
  timestamps = change_points_to_segments(df, change_points)
89
 
90
  # To store "decisions" about segments
91
+ segment_decisions = []
92
  seg_i = 0
93
 
94
  # To plot the detected segment lines
 
142
  "Source Video ID" : video_id,
143
  "Uncertainty" : np.round(average_diff, 3),
144
  "Average Offset in Seconds" : np.round(average_offset, 3),
145
+ "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
146
+ segment_decisions.append(decision)
147
  seg_i += 1
148
  # print(decision)
149
 
videomatch.py CHANGED
@@ -63,14 +63,21 @@ def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
63
  lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
64
  return lims, D, I, hash_vectors
65
 
66
- def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
67
  """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
68
- until the number of matches found is equal to or higher than the number of frames in the source video"""
 
 
 
 
 
 
69
  for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
70
  distance = int(distance)
71
- video_index, hash_vectors = get_video_index(filepath)
72
- target_index, _ = get_video_index(target)
73
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
 
74
  nr_source_frames = video_index.ntotal
75
  nr_matches = len(D)
76
  logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
@@ -79,7 +86,7 @@ def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
79
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
80
  return None
81
 
82
- def get_change_points(df, smoothing_window_size=10, method='CUSUM', metric="OFFSET_LIP"):
83
  tsd = TimeSeriesData(df.loc[:,['time', metric]])
84
  if method.upper() == "CUSUM":
85
  detector = CUSUMDetector(tsd)
@@ -95,11 +102,12 @@ def get_change_points(df, smoothing_window_size=10, method='CUSUM', metric="OFFS
95
  print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
96
  return change_points
97
 
98
- def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
99
- distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
100
- _, hash_vectors = get_video_index(url)
101
- target_index, _ = get_video_index(target)
102
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
 
103
 
104
  target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
105
  target_s = [i/FPS for j in target for i in j]
@@ -109,14 +117,8 @@ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, window_size=ROLLIN
109
  df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
110
  if vanilla_df:
111
  return df
112
-
113
- # Minimum distance dataframe ----
114
- # Group by X so for every second/x there will be 1 value of Y in the end
115
- # index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
116
- # df_min = df.loc[index_min_distance]
117
- # df_min
118
- # -------------------------------
119
 
 
120
  df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
121
  df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
122
 
 
63
  lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
64
  return lims, D, I, hash_vectors
65
 
66
+ def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE):
67
  """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
68
+ until the number of matches found is equal to or higher than the number of frames in the source video
69
+
70
+ args:
71
+ - video_index: The index of the source video
72
+ - hash_vectors: The hash vectors of the target video
73
+ - target_index: The index of the target video
74
+ """
75
  for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
76
  distance = int(distance)
77
+ # --- Previously ---
78
+ # video_index, hash_vectors = get_video_index(filepath)
79
+ # target_index, _ = get_video_index(target)
80
+ _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
81
  nr_source_frames = video_index.ntotal
82
  nr_matches = len(D)
83
  logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
 
86
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
87
  return None
88
 
89
+ def get_change_points(df, smoothing_window_size=10, method='ROBUST', metric="ROLL_OFFSET_MODE"):
90
  tsd = TimeSeriesData(df.loc[:,['time', metric]])
91
  if method.upper() == "CUSUM":
92
  detector = CUSUMDetector(tsd)
 
102
  print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
103
  return change_points
104
 
105
+ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTANCE, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
106
+ # --- Previously ---
107
+ # distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
108
+ # _, hash_vectors = get_video_index(url)
109
+ # target_index, _ = get_video_index(target)
110
+ # lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
111
 
112
  target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
113
  target_s = [i/FPS for j in target for i in j]
 
117
  df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
118
  if vanilla_df:
119
  return df
 
 
 
 
 
 
 
120
 
121
+ # Weight values by distance of their match
122
  df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
123
  df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
124