Spaces:
Build error
Build error
Iskaj
commited on
Commit
•
6608d1c
1
Parent(s):
1991773
cleaned up multiple comparison code
Browse files- app.py +39 -27
- plot.py +8 -10
- videomatch.py +7 -6
app.py
CHANGED
@@ -8,8 +8,7 @@ from faiss import read_index_binary, write_index_binary
|
|
8 |
|
9 |
from config import *
|
10 |
from videomatch import index_hashes_for_video, get_decent_distance, \
|
11 |
-
get_video_index, compare_videos, get_change_points, get_videomatch_df
|
12 |
-
get_target_urls
|
13 |
from plot import plot_comparison, plot_multi_comparison, plot_segment_comparison
|
14 |
|
15 |
logging.basicConfig()
|
@@ -18,31 +17,33 @@ logging.getLogger().setLevel(logging.INFO)
|
|
18 |
def transfer_data_indices_to_temp(temp_path = VIDEO_DIRECTORY, data_path='./data'):
|
19 |
""" The binary indices created from the .json file are not stored in the temporary directory
|
20 |
This function will load these indices and write them to the temporary directory.
|
21 |
-
Doing it this way
|
22 |
files are the same """
|
23 |
index_files = os.listdir(data_path)
|
24 |
for index_file in index_files:
|
25 |
# Read from static location and write to temp storage
|
26 |
binary_index = read_index_binary(os.path.join(data_path, index_file))
|
27 |
write_index_binary(binary_index, f'{temp_path}/{index_file}')
|
28 |
-
|
29 |
-
def get_comparison(url, target, MIN_DISTANCE = 4):
|
30 |
-
""" Function for Gradio to combine all helper functions"""
|
31 |
-
video_index, hash_vectors = get_video_index(url)
|
32 |
-
target_index, _ = get_video_index(target)
|
33 |
-
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = MIN_DISTANCE)
|
34 |
-
fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
|
35 |
-
return fig
|
36 |
|
37 |
def compare(url, target):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Get source and target indices
|
39 |
source_index, source_hash_vectors = get_video_index(url)
|
40 |
-
target_index, _ = get_video_index(
|
41 |
|
42 |
# Get decent distance by comparing url index with the target hash vectors + target index
|
43 |
distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
|
44 |
if distance == None:
|
45 |
-
logging.info(f"No matches found between {url} and {
|
46 |
return plt.figure(), []
|
47 |
else:
|
48 |
# Compare videos with heuristic distance
|
@@ -55,15 +56,19 @@ def compare(url, target):
|
|
55 |
change_points = get_change_points(df, metric="ROLL_OFFSET_MODE", method="ROBUST")
|
56 |
|
57 |
# Plot and get figure and .json-style segment decision
|
58 |
-
fig, segment_decision = plot_segment_comparison(df, change_points, video_id=
|
59 |
return fig, segment_decision
|
60 |
|
61 |
def multiple_comparison(url, return_figure=False):
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
# Figure and decision (list of dicts) storage
|
65 |
figures, decisions = [], []
|
66 |
-
for target in
|
67 |
# Make comparison
|
68 |
fig, segment_decision = compare(url, target)
|
69 |
|
@@ -78,18 +83,25 @@ def multiple_comparison(url, return_figure=False):
|
|
78 |
def plot_multiple_comparison(url):
|
79 |
return multiple_comparison(url, return_figure=True)
|
80 |
|
|
|
81 |
transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to actually obtain the .index files
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
|
90 |
inputs="text",
|
91 |
outputs="text",
|
92 |
-
examples=
|
93 |
|
94 |
# compare_iface = gr.Interface(fn=get_comparison,
|
95 |
# inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
|
@@ -98,13 +110,13 @@ index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
|
|
98 |
|
99 |
plot_compare_iface = gr.Interface(fn=plot_multiple_comparison,
|
100 |
inputs=["text"],
|
101 |
-
outputs=[gr.Plot() for
|
102 |
-
examples=
|
103 |
|
104 |
auto_compare_iface = gr.Interface(fn=multiple_comparison,
|
105 |
inputs=["text"],
|
106 |
outputs=["json"],
|
107 |
-
examples=
|
108 |
|
109 |
iface = gr.TabbedInterface([auto_compare_iface, plot_compare_iface, index_iface], ["AutoCompare", "PlotAutoCompare", "Index"])
|
110 |
|
|
|
8 |
|
9 |
from config import *
|
10 |
from videomatch import index_hashes_for_video, get_decent_distance, \
|
11 |
+
get_video_index, compare_videos, get_change_points, get_videomatch_df
|
|
|
12 |
from plot import plot_comparison, plot_multi_comparison, plot_segment_comparison
|
13 |
|
14 |
logging.basicConfig()
|
|
|
17 |
def transfer_data_indices_to_temp(temp_path = VIDEO_DIRECTORY, data_path='./data'):
|
18 |
""" The binary indices created from the .json file are not stored in the temporary directory
|
19 |
This function will load these indices and write them to the temporary directory.
|
20 |
+
Doing it this way preserves the way to link dynamically downloaded files and the static
|
21 |
files are the same """
|
22 |
index_files = os.listdir(data_path)
|
23 |
for index_file in index_files:
|
24 |
# Read from static location and write to temp storage
|
25 |
binary_index = read_index_binary(os.path.join(data_path, index_file))
|
26 |
write_index_binary(binary_index, f'{temp_path}/{index_file}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
def compare(url, target):
|
29 |
+
""" Compare a single url (user submitted) to a single target entry and return the corresponding
|
30 |
+
figure and decision (.json-esque list of dictionaries)
|
31 |
+
|
32 |
+
args:
|
33 |
+
- url: User submitted url which will be downloaded and cached
|
34 |
+
- target: Target entry with a 'url' and 'mp4' attribute
|
35 |
+
"""
|
36 |
+
target_title = target['url']
|
37 |
+
target_mp4 = target['mp4']
|
38 |
+
|
39 |
# Get source and target indices
|
40 |
source_index, source_hash_vectors = get_video_index(url)
|
41 |
+
target_index, _ = get_video_index(target_mp4)
|
42 |
|
43 |
# Get decent distance by comparing url index with the target hash vectors + target index
|
44 |
distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
|
45 |
if distance == None:
|
46 |
+
logging.info(f"No matches found between {url} and {target_mp4}!")
|
47 |
return plt.figure(), []
|
48 |
else:
|
49 |
# Compare videos with heuristic distance
|
|
|
56 |
change_points = get_change_points(df, metric="ROLL_OFFSET_MODE", method="ROBUST")
|
57 |
|
58 |
# Plot and get figure and .json-style segment decision
|
59 |
+
fig, segment_decision = plot_segment_comparison(df, change_points, video_id=target_title, video_mp4=target_mp4)
|
60 |
return fig, segment_decision
|
61 |
|
62 |
def multiple_comparison(url, return_figure=False):
|
63 |
+
""" Compare a single url (user submitted) to all target entries and return the corresponding
|
64 |
+
figures and decisions (.json-style list of dictionaries)
|
65 |
+
|
66 |
+
args:
|
67 |
+
- url: User submitted url which will be downloaded and cached
|
68 |
+
- return_figure: Parameter to decide if to return figures or decision, needed for Gradio plotting """
|
69 |
# Figure and decision (list of dicts) storage
|
70 |
figures, decisions = [], []
|
71 |
+
for target in TARGET_ENTRIES:
|
72 |
# Make comparison
|
73 |
fig, segment_decision = compare(url, target)
|
74 |
|
|
|
83 |
def plot_multiple_comparison(url):
|
84 |
return multiple_comparison(url, return_figure=True)
|
85 |
|
86 |
+
# Write stored target videos to temporary storage
|
87 |
transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to actually obtain the .index files
|
88 |
+
|
89 |
+
# Load stored target videos
|
90 |
+
with open('apb2022.json', "r") as json_file:
|
91 |
+
TARGET_ENTRIES = json.load(json_file)
|
92 |
+
|
93 |
+
EXAMPLE_VIDEO_URLS = ["https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
|
94 |
+
"https://video.twimg.com/amplify_video/1575576025651617796/vid/480x852/jP057nPfPJSUM0kR.mp4?tag=14",
|
95 |
+
"https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
96 |
+
"https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
|
97 |
+
"https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
|
98 |
+
"https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download",
|
99 |
+
"https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
|
100 |
|
101 |
index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
|
102 |
inputs="text",
|
103 |
outputs="text",
|
104 |
+
examples=EXAMPLE_VIDEO_URLS, cache_examples=True)
|
105 |
|
106 |
# compare_iface = gr.Interface(fn=get_comparison,
|
107 |
# inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
|
|
|
110 |
|
111 |
plot_compare_iface = gr.Interface(fn=plot_multiple_comparison,
|
112 |
inputs=["text"],
|
113 |
+
outputs=[gr.Plot(label=entry['url']) for entry in TARGET_ENTRIES],
|
114 |
+
examples=EXAMPLE_VIDEO_URLS)
|
115 |
|
116 |
auto_compare_iface = gr.Interface(fn=multiple_comparison,
|
117 |
inputs=["text"],
|
118 |
outputs=["json"],
|
119 |
+
examples=EXAMPLE_VIDEO_URLS)
|
120 |
|
121 |
iface = gr.TabbedInterface([auto_compare_iface, plot_compare_iface, index_iface], ["AutoCompare", "PlotAutoCompare", "Index"])
|
122 |
|
plot.py
CHANGED
@@ -69,7 +69,7 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
|
|
69 |
return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
|
70 |
return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
|
71 |
|
72 |
-
def plot_segment_comparison(df, change_points, video_id="
|
73 |
""" Based on the dataframe and detected change points do two things:
|
74 |
1. Make a decision on where each segment belongs in time and return that info as a list of dicts
|
75 |
2. Plot how this decision got made as an informative plot
|
@@ -138,18 +138,16 @@ def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID",
|
|
138 |
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
|
139 |
|
140 |
# Decisions about segments
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
decision = {"Target Start Time" : start_time_str,
|
146 |
-
"Target End Time" : end_time_str,
|
147 |
-
"Source Start Time" : origin_start_time_str,
|
148 |
-
"Source End Time" : origin_end_time_str,
|
149 |
"Source Video ID" : video_id,
|
|
|
150 |
"Uncertainty" : np.round(average_diff, 3),
|
151 |
"Average Offset in Seconds" : np.round(average_offset, 3),
|
152 |
-
"Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}"
|
|
|
153 |
segment_decisions.append(decision)
|
154 |
seg_i += 1
|
155 |
# print(decision)
|
|
|
69 |
return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
|
70 |
return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
|
71 |
|
72 |
+
def plot_segment_comparison(df, change_points, video_mp4 = "Placeholder.mp4", video_id="Placeholder.videoID", threshold_diff = 1.5):
|
73 |
""" Based on the dataframe and detected change points do two things:
|
74 |
1. Make a decision on where each segment belongs in time and return that info as a list of dicts
|
75 |
2. Plot how this decision got made as an informative plot
|
|
|
138 |
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
|
139 |
|
140 |
# Decisions about segments
|
141 |
+
decision = {"Target Start Time" : pd.to_datetime(start_time).strftime('%H:%M:%S'),
|
142 |
+
"Target End Time" : pd.to_datetime(end_time).strftime('%H:%M:%S'),
|
143 |
+
"Source Start Time" : pd.to_datetime(origin_start_time).strftime('%H:%M:%S'),
|
144 |
+
"Source End Time" : pd.to_datetime(origin_end_time).strftime('%H:%M:%S'),
|
|
|
|
|
|
|
|
|
145 |
"Source Video ID" : video_id,
|
146 |
+
"Source Video .mp4" : video_mp4,
|
147 |
"Uncertainty" : np.round(average_diff, 3),
|
148 |
"Average Offset in Seconds" : np.round(average_offset, 3),
|
149 |
+
# "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}",
|
150 |
+
}
|
151 |
segment_decisions.append(decision)
|
152 |
seg_i += 1
|
153 |
# print(decision)
|
videomatch.py
CHANGED
@@ -15,11 +15,11 @@ import pandas as pd
|
|
15 |
from videohash import compute_hashes, filepath_from_url
|
16 |
from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
|
17 |
|
18 |
-
def get_target_urls(json_file='apb2022.json'):
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
|
24 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
25 |
""" Compute hashes of a video and index the video using faiss indices and return the index. """
|
@@ -86,7 +86,8 @@ def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, M
|
|
86 |
_, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
87 |
nr_source_frames = video_index.ntotal
|
88 |
nr_matches = len(D)
|
89 |
-
|
|
|
90 |
if nr_matches >= nr_source_frames:
|
91 |
return distance
|
92 |
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
|
|
|
15 |
from videohash import compute_hashes, filepath_from_url
|
16 |
from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
|
17 |
|
18 |
+
# def get_target_urls(json_file='apb2022.json'):
|
19 |
+
# """ Obtain target urls for the target videos of a json file containing .mp4 files """
|
20 |
+
# with open('apb2022.json', "r") as json_file:
|
21 |
+
# target_videos = json.load(json_file)
|
22 |
+
# return [video['mp4'] for video in target_videos]
|
23 |
|
24 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
25 |
""" Compute hashes of a video and index the video using faiss indices and return the index. """
|
|
|
86 |
_, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
87 |
nr_source_frames = video_index.ntotal
|
88 |
nr_matches = len(D)
|
89 |
+
if nr_matches > 0:
|
90 |
+
logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
|
91 |
if nr_matches >= nr_source_frames:
|
92 |
return distance
|
93 |
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
|