Iskaj commited on
Commit
6608d1c
1 Parent(s): 1991773

cleaned up multiple comparison code

Browse files
Files changed (3) hide show
  1. app.py +39 -27
  2. plot.py +8 -10
  3. videomatch.py +7 -6
app.py CHANGED
@@ -8,8 +8,7 @@ from faiss import read_index_binary, write_index_binary
8
 
9
  from config import *
10
  from videomatch import index_hashes_for_video, get_decent_distance, \
11
- get_video_index, compare_videos, get_change_points, get_videomatch_df, \
12
- get_target_urls
13
  from plot import plot_comparison, plot_multi_comparison, plot_segment_comparison
14
 
15
  logging.basicConfig()
@@ -18,31 +17,33 @@ logging.getLogger().setLevel(logging.INFO)
18
  def transfer_data_indices_to_temp(temp_path = VIDEO_DIRECTORY, data_path='./data'):
19
  """ The binary indices created from the .json file are not stored in the temporary directory
20
  This function will load these indices and write them to the temporary directory.
21
- Doing it this way reserves the way to link dynamically downloaded files and the static
22
  files are the same """
23
  index_files = os.listdir(data_path)
24
  for index_file in index_files:
25
  # Read from static location and write to temp storage
26
  binary_index = read_index_binary(os.path.join(data_path, index_file))
27
  write_index_binary(binary_index, f'{temp_path}/{index_file}')
28
-
29
- def get_comparison(url, target, MIN_DISTANCE = 4):
30
- """ Function for Gradio to combine all helper functions"""
31
- video_index, hash_vectors = get_video_index(url)
32
- target_index, _ = get_video_index(target)
33
- lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = MIN_DISTANCE)
34
- fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
35
- return fig
36
 
37
  def compare(url, target):
 
 
 
 
 
 
 
 
 
 
38
  # Get source and target indices
39
  source_index, source_hash_vectors = get_video_index(url)
40
- target_index, _ = get_video_index(target)
41
 
42
  # Get decent distance by comparing url index with the target hash vectors + target index
43
  distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
44
  if distance == None:
45
- logging.info(f"No matches found between {url} and {target}!")
46
  return plt.figure(), []
47
  else:
48
  # Compare videos with heuristic distance
@@ -55,15 +56,19 @@ def compare(url, target):
55
  change_points = get_change_points(df, metric="ROLL_OFFSET_MODE", method="ROBUST")
56
 
57
  # Plot and get figure and .json-style segment decision
58
- fig, segment_decision = plot_segment_comparison(df, change_points, video_id=target)
59
  return fig, segment_decision
60
 
61
  def multiple_comparison(url, return_figure=False):
62
- targets = get_target_urls()
63
-
 
 
 
 
64
  # Figure and decision (list of dicts) storage
65
  figures, decisions = [], []
66
- for target in targets:
67
  # Make comparison
68
  fig, segment_decision = compare(url, target)
69
 
@@ -78,18 +83,25 @@ def multiple_comparison(url, return_figure=False):
78
  def plot_multiple_comparison(url):
79
  return multiple_comparison(url, return_figure=True)
80
 
 
81
  transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to actually obtain the .index files
82
- example_video_urls = ["https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
83
- "https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
84
- "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
85
- "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
86
- "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download",
87
- "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
 
 
 
 
 
 
88
 
89
  index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
90
  inputs="text",
91
  outputs="text",
92
- examples=example_video_urls, cache_examples=True)
93
 
94
  # compare_iface = gr.Interface(fn=get_comparison,
95
  # inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
@@ -98,13 +110,13 @@ index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
98
 
99
  plot_compare_iface = gr.Interface(fn=plot_multiple_comparison,
100
  inputs=["text"],
101
- outputs=[gr.Plot() for _ in range(len(get_target_urls()))],
102
- examples=example_video_urls)
103
 
104
  auto_compare_iface = gr.Interface(fn=multiple_comparison,
105
  inputs=["text"],
106
  outputs=["json"],
107
- examples=example_video_urls)
108
 
109
  iface = gr.TabbedInterface([auto_compare_iface, plot_compare_iface, index_iface], ["AutoCompare", "PlotAutoCompare", "Index"])
110
 
 
8
 
9
  from config import *
10
  from videomatch import index_hashes_for_video, get_decent_distance, \
11
+ get_video_index, compare_videos, get_change_points, get_videomatch_df
 
12
  from plot import plot_comparison, plot_multi_comparison, plot_segment_comparison
13
 
14
  logging.basicConfig()
 
17
  def transfer_data_indices_to_temp(temp_path = VIDEO_DIRECTORY, data_path='./data'):
18
  """ The binary indices created from the .json file are not stored in the temporary directory
19
  This function will load these indices and write them to the temporary directory.
20
+ Doing it this way preserves the way to link dynamically downloaded files and the static
21
  files are the same """
22
  index_files = os.listdir(data_path)
23
  for index_file in index_files:
24
  # Read from static location and write to temp storage
25
  binary_index = read_index_binary(os.path.join(data_path, index_file))
26
  write_index_binary(binary_index, f'{temp_path}/{index_file}')
 
 
 
 
 
 
 
 
27
 
28
  def compare(url, target):
29
+ """ Compare a single url (user submitted) to a single target entry and return the corresponding
30
+ figure and decision (.json-esque list of dictionaries)
31
+
32
+ args:
33
+ - url: User submitted url which will be downloaded and cached
34
+ - target: Target entry with a 'url' and 'mp4' attribute
35
+ """
36
+ target_title = target['url']
37
+ target_mp4 = target['mp4']
38
+
39
  # Get source and target indices
40
  source_index, source_hash_vectors = get_video_index(url)
41
+ target_index, _ = get_video_index(target_mp4)
42
 
43
  # Get decent distance by comparing url index with the target hash vectors + target index
44
  distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
45
  if distance == None:
46
+ logging.info(f"No matches found between {url} and {target_mp4}!")
47
  return plt.figure(), []
48
  else:
49
  # Compare videos with heuristic distance
 
56
  change_points = get_change_points(df, metric="ROLL_OFFSET_MODE", method="ROBUST")
57
 
58
  # Plot and get figure and .json-style segment decision
59
+ fig, segment_decision = plot_segment_comparison(df, change_points, video_id=target_title, video_mp4=target_mp4)
60
  return fig, segment_decision
61
 
62
  def multiple_comparison(url, return_figure=False):
63
+ """ Compare a single url (user submitted) to all target entries and return the corresponding
64
+ figures and decisions (.json-style list of dictionaries)
65
+
66
+ args:
67
+ - url: User submitted url which will be downloaded and cached
68
+ - return_figure: Parameter to decide if to return figures or decision, needed for Gradio plotting """
69
  # Figure and decision (list of dicts) storage
70
  figures, decisions = [], []
71
+ for target in TARGET_ENTRIES:
72
  # Make comparison
73
  fig, segment_decision = compare(url, target)
74
 
 
83
  def plot_multiple_comparison(url):
84
  return multiple_comparison(url, return_figure=True)
85
 
86
+ # Write stored target videos to temporary storage
87
  transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to actually obtain the .index files
88
+
89
+ # Load stored target videos
90
+ with open('apb2022.json', "r") as json_file:
91
+ TARGET_ENTRIES = json.load(json_file)
92
+
93
+ EXAMPLE_VIDEO_URLS = ["https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
94
+ "https://video.twimg.com/amplify_video/1575576025651617796/vid/480x852/jP057nPfPJSUM0kR.mp4?tag=14",
95
+ "https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
96
+ "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
97
+ "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
98
+ "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download",
99
+ "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
100
 
101
  index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
102
  inputs="text",
103
  outputs="text",
104
+ examples=EXAMPLE_VIDEO_URLS, cache_examples=True)
105
 
106
  # compare_iface = gr.Interface(fn=get_comparison,
107
  # inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
 
110
 
111
  plot_compare_iface = gr.Interface(fn=plot_multiple_comparison,
112
  inputs=["text"],
113
+ outputs=[gr.Plot(label=entry['url']) for entry in TARGET_ENTRIES],
114
+ examples=EXAMPLE_VIDEO_URLS)
115
 
116
  auto_compare_iface = gr.Interface(fn=multiple_comparison,
117
  inputs=["text"],
118
  outputs=["json"],
119
+ examples=EXAMPLE_VIDEO_URLS)
120
 
121
  iface = gr.TabbedInterface([auto_compare_iface, plot_compare_iface, index_iface], ["AutoCompare", "PlotAutoCompare", "Index"])
122
 
plot.py CHANGED
@@ -69,7 +69,7 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
69
  return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
70
  return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
71
 
72
- def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID", threshold_diff = 1.5):
73
  """ Based on the dataframe and detected change points do two things:
74
  1. Make a decision on where each segment belongs in time and return that info as a list of dicts
75
  2. Plot how this decision got made as an informative plot
@@ -138,18 +138,16 @@ def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID",
138
  plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
139
 
140
  # Decisions about segments
141
- start_time_str = pd.to_datetime(start_time).strftime('%H:%M:%S')
142
- end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
143
- origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
144
- origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
145
- decision = {"Target Start Time" : start_time_str,
146
- "Target End Time" : end_time_str,
147
- "Source Start Time" : origin_start_time_str,
148
- "Source End Time" : origin_end_time_str,
149
  "Source Video ID" : video_id,
 
150
  "Uncertainty" : np.round(average_diff, 3),
151
  "Average Offset in Seconds" : np.round(average_offset, 3),
152
- "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
 
153
  segment_decisions.append(decision)
154
  seg_i += 1
155
  # print(decision)
 
69
  return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
70
  return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
71
 
72
+ def plot_segment_comparison(df, change_points, video_mp4 = "Placeholder.mp4", video_id="Placeholder.videoID", threshold_diff = 1.5):
73
  """ Based on the dataframe and detected change points do two things:
74
  1. Make a decision on where each segment belongs in time and return that info as a list of dicts
75
  2. Plot how this decision got made as an informative plot
 
138
  plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
139
 
140
  # Decisions about segments
141
+ decision = {"Target Start Time" : pd.to_datetime(start_time).strftime('%H:%M:%S'),
142
+ "Target End Time" : pd.to_datetime(end_time).strftime('%H:%M:%S'),
143
+ "Source Start Time" : pd.to_datetime(origin_start_time).strftime('%H:%M:%S'),
144
+ "Source End Time" : pd.to_datetime(origin_end_time).strftime('%H:%M:%S'),
 
 
 
 
145
  "Source Video ID" : video_id,
146
+ "Source Video .mp4" : video_mp4,
147
  "Uncertainty" : np.round(average_diff, 3),
148
  "Average Offset in Seconds" : np.round(average_offset, 3),
149
+ # "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}",
150
+ }
151
  segment_decisions.append(decision)
152
  seg_i += 1
153
  # print(decision)
videomatch.py CHANGED
@@ -15,11 +15,11 @@ import pandas as pd
15
  from videohash import compute_hashes, filepath_from_url
16
  from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
17
 
18
- def get_target_urls(json_file='apb2022.json'):
19
- """ Obtain target urls for the target videos of a json file containing .mp4 files """
20
- with open('apb2022.json', "r") as json_file:
21
- target_videos = json.load(json_file)
22
- return [video['mp4'] for video in target_videos]
23
 
24
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
25
  """ Compute hashes of a video and index the video using faiss indices and return the index. """
@@ -86,7 +86,8 @@ def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, M
86
  _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
87
  nr_source_frames = video_index.ntotal
88
  nr_matches = len(D)
89
- logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
 
90
  if nr_matches >= nr_source_frames:
91
  return distance
92
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
 
15
  from videohash import compute_hashes, filepath_from_url
16
  from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
17
 
18
+ # def get_target_urls(json_file='apb2022.json'):
19
+ # """ Obtain target urls for the target videos of a json file containing .mp4 files """
20
+ # with open('apb2022.json', "r") as json_file:
21
+ # target_videos = json.load(json_file)
22
+ # return [video['mp4'] for video in target_videos]
23
 
24
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
25
  """ Compute hashes of a video and index the video using faiss indices and return the index. """
 
86
  _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
87
  nr_source_frames = video_index.ntotal
88
  nr_matches = len(D)
89
+ if nr_matches > 0:
90
+ logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
91
  if nr_matches >= nr_source_frames:
92
  return distance
93
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")