Spaces:

RTL
/

videomatch

Build error

App Files Files Community

Iskaj commited on Oct 4, 2022

Commit

6608d1c

1 Parent(s): 1991773

cleaned up multiple comparison code

Browse files

Files changed (3) hide show

app.py +39 -27
plot.py +8 -10
videomatch.py +7 -6

app.py CHANGED Viewed

@@ -8,8 +8,7 @@ from faiss import read_index_binary, write_index_binary
 from config import *
 from videomatch import index_hashes_for_video, get_decent_distance, \
-    get_video_index, compare_videos, get_change_points, get_videomatch_df, \
-    get_target_urls
 from plot import plot_comparison, plot_multi_comparison, plot_segment_comparison
 logging.basicConfig()
@@ -18,31 +17,33 @@ logging.getLogger().setLevel(logging.INFO)
 def transfer_data_indices_to_temp(temp_path = VIDEO_DIRECTORY, data_path='./data'):
     """ The binary indices created from the .json file are not stored in the temporary directory
     This function will load these indices and write them to the temporary directory.
-    Doing it this way reserves the way to link dynamically downloaded files and the static
     files are the same """
     index_files = os.listdir(data_path)
     for index_file in index_files:
         # Read from static location and write to temp storage
         binary_index = read_index_binary(os.path.join(data_path, index_file))
         write_index_binary(binary_index, f'{temp_path}/{index_file}')
-def get_comparison(url, target, MIN_DISTANCE = 4):
-    """ Function for Gradio to combine all helper functions"""
-    video_index, hash_vectors = get_video_index(url)
-    target_index, _ = get_video_index(target)
-    lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = MIN_DISTANCE)
-    fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
-    return fig
 def compare(url, target):
     # Get source and target indices
     source_index, source_hash_vectors = get_video_index(url)
-    target_index, _ = get_video_index(target)
     # Get decent distance by comparing url index with the target hash vectors + target index
     distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
     if distance == None:
-        logging.info(f"No matches found between {url} and {target}!")
         return plt.figure(), []
     else:
         # Compare videos with heuristic distance
@@ -55,15 +56,19 @@ def compare(url, target):
         change_points = get_change_points(df, metric="ROLL_OFFSET_MODE", method="ROBUST")
         # Plot and get figure and .json-style segment decision
-        fig, segment_decision = plot_segment_comparison(df, change_points, video_id=target)
         return fig, segment_decision
 def multiple_comparison(url, return_figure=False):
-    targets = get_target_urls()
     # Figure and decision (list of dicts) storage
     figures, decisions = [], []
-    for target in targets:
         # Make comparison
         fig, segment_decision = compare(url, target)
@@ -78,18 +83,25 @@ def multiple_comparison(url, return_figure=False):
 def plot_multiple_comparison(url):
     return multiple_comparison(url, return_figure=True)
 transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to actually obtain the .index files
-example_video_urls = ["https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
-              "https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
-              "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
-              "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
-              "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download",
-              "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
 index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
                      inputs="text",
                      outputs="text",
-                     examples=example_video_urls, cache_examples=True)
 # compare_iface = gr.Interface(fn=get_comparison,
 #                      inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
@@ -98,13 +110,13 @@ index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
 plot_compare_iface = gr.Interface(fn=plot_multiple_comparison,
                      inputs=["text"],
-                     outputs=[gr.Plot() for _ in range(len(get_target_urls()))],
-                     examples=example_video_urls)
 auto_compare_iface = gr.Interface(fn=multiple_comparison,
                      inputs=["text"],
                      outputs=["json"],
-                     examples=example_video_urls)
 iface = gr.TabbedInterface([auto_compare_iface, plot_compare_iface, index_iface], ["AutoCompare", "PlotAutoCompare", "Index"])

 from config import *
 from videomatch import index_hashes_for_video, get_decent_distance, \
+    get_video_index, compare_videos, get_change_points, get_videomatch_df
 from plot import plot_comparison, plot_multi_comparison, plot_segment_comparison
 logging.basicConfig()
 def transfer_data_indices_to_temp(temp_path = VIDEO_DIRECTORY, data_path='./data'):
     """ The binary indices created from the .json file are not stored in the temporary directory
     This function will load these indices and write them to the temporary directory.
+    Doing it this way preserves the way to link dynamically downloaded files and the static
     files are the same """
     index_files = os.listdir(data_path)
     for index_file in index_files:
         # Read from static location and write to temp storage
         binary_index = read_index_binary(os.path.join(data_path, index_file))
         write_index_binary(binary_index, f'{temp_path}/{index_file}')
 def compare(url, target):
+    """ Compare a single url (user submitted) to a single target entry and return the corresponding
+    figure and decision (.json-esque list of dictionaries)
+    args:
+    - url: User submitted url which will be downloaded and cached
+    - target: Target entry with a 'url' and 'mp4' attribute
+    """
+    target_title = target['url']
+    target_mp4 = target['mp4']
     # Get source and target indices
     source_index, source_hash_vectors = get_video_index(url)
+    target_index, _ = get_video_index(target_mp4)
     # Get decent distance by comparing url index with the target hash vectors + target index
     distance = get_decent_distance(source_index, source_hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE)
     if distance == None:
+        logging.info(f"No matches found between {url} and {target_mp4}!")
         return plt.figure(), []
     else:
         # Compare videos with heuristic distance
         change_points = get_change_points(df, metric="ROLL_OFFSET_MODE", method="ROBUST")
         # Plot and get figure and .json-style segment decision
+        fig, segment_decision = plot_segment_comparison(df, change_points, video_id=target_title, video_mp4=target_mp4)
         return fig, segment_decision
 def multiple_comparison(url, return_figure=False):
+    """ Compare a single url (user submitted) to all target entries and return the corresponding
+    figures and decisions (.json-style list of dictionaries)
+    args:
+    - url: User submitted url which will be downloaded and cached
+    - return_figure: Parameter to decide if to return figures or decision, needed for Gradio plotting """
     # Figure and decision (list of dicts) storage
     figures, decisions = [], []
+    for target in TARGET_ENTRIES:
         # Make comparison
         fig, segment_decision = compare(url, target)
 def plot_multiple_comparison(url):
     return multiple_comparison(url, return_figure=True)
+# Write stored target videos to temporary storage
 transfer_data_indices_to_temp() # NOTE: Only works after doing 'git lfs pull' to actually obtain the .index files
+# Load stored target videos
+with open('apb2022.json', "r") as json_file:
+    TARGET_ENTRIES = json.load(json_file)
+EXAMPLE_VIDEO_URLS = ["https://drive.google.com/uc?id=1Y1-ypXOvLrp1x0cjAe_hMobCEdA0UbEo&export=download",
+                    "https://video.twimg.com/amplify_video/1575576025651617796/vid/480x852/jP057nPfPJSUM0kR.mp4?tag=14",
+                    "https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
+                    "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
+                    "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
+                    "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download",
+                    "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
 index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
                      inputs="text",
                      outputs="text",
+                     examples=EXAMPLE_VIDEO_URLS, cache_examples=True)
 # compare_iface = gr.Interface(fn=get_comparison,
 #                      inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
 plot_compare_iface = gr.Interface(fn=plot_multiple_comparison,
                      inputs=["text"],
+                     outputs=[gr.Plot(label=entry['url']) for entry in TARGET_ENTRIES],
+                     examples=EXAMPLE_VIDEO_URLS)
 auto_compare_iface = gr.Interface(fn=multiple_comparison,
                      inputs=["text"],
                      outputs=["json"],
+                     examples=EXAMPLE_VIDEO_URLS)
 iface = gr.TabbedInterface([auto_compare_iface, plot_compare_iface, index_iface], ["AutoCompare", "PlotAutoCompare", "Index"])

plot.py CHANGED Viewed

@@ -69,7 +69,7 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
         return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
     return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
-def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID", threshold_diff = 1.5):
     """ Based on the dataframe and detected change points do two things:
     1. Make a decision on where each segment belongs in time and return that info as a list of dicts
     2. Plot how this decision got made as an informative plot
@@ -138,18 +138,16 @@ def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID",
             plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
         # Decisions about segments
-        start_time_str = pd.to_datetime(start_time).strftime('%H:%M:%S')
-        end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
-        origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
-        origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
-        decision = {"Target Start Time" : start_time_str,
-                    "Target End Time" : end_time_str,
-                    "Source Start Time" : origin_start_time_str,
-                    "Source End Time" : origin_end_time_str,
                     "Source Video ID" : video_id,
                     "Uncertainty" : np.round(average_diff, 3),
                     "Average Offset in Seconds" : np.round(average_offset, 3),
-                    "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
         segment_decisions.append(decision)
         seg_i += 1
         # print(decision)

         return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
     return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
+def plot_segment_comparison(df, change_points, video_mp4 = "Placeholder.mp4", video_id="Placeholder.videoID", threshold_diff = 1.5):
     """ Based on the dataframe and detected change points do two things:
     1. Make a decision on where each segment belongs in time and return that info as a list of dicts
     2. Plot how this decision got made as an informative plot
             plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
         # Decisions about segments
+        decision = {"Target Start Time" : pd.to_datetime(start_time).strftime('%H:%M:%S'),
+                    "Target End Time" : pd.to_datetime(end_time).strftime('%H:%M:%S'),
+                    "Source Start Time" : pd.to_datetime(origin_start_time).strftime('%H:%M:%S'),
+                    "Source End Time" : pd.to_datetime(origin_end_time).strftime('%H:%M:%S'),
                     "Source Video ID" : video_id,
+                    "Source Video .mp4" : video_mp4,
                     "Uncertainty" : np.round(average_diff, 3),
                     "Average Offset in Seconds" : np.round(average_offset, 3),
+                    # "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}",
+                    }
         segment_decisions.append(decision)
         seg_i += 1
         # print(decision)

videomatch.py CHANGED Viewed

@@ -15,11 +15,11 @@ import pandas as pd
 from videohash import compute_hashes, filepath_from_url
 from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
-def get_target_urls(json_file='apb2022.json'):
-    """ Obtain target urls for the target videos of a json file containing .mp4 files """
-    with open('apb2022.json', "r") as json_file:
-        target_videos = json.load(json_file)
-        return [video['mp4'] for video in target_videos]
 def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
     """ Compute hashes of a video and index the video using faiss indices and return the index. """
@@ -86,7 +86,8 @@ def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, M
         _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
         nr_source_frames = video_index.ntotal
         nr_matches = len(D)
-        logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
         if nr_matches >= nr_source_frames:
             return distance
     logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")

 from videohash import compute_hashes, filepath_from_url
 from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
+# def get_target_urls(json_file='apb2022.json'):
+#     """ Obtain target urls for the target videos of a json file containing .mp4 files """
+#     with open('apb2022.json', "r") as json_file:
+#         target_videos = json.load(json_file)
+#         return [video['mp4'] for video in target_videos]
 def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
     """ Compute hashes of a video and index the video using faiss indices and return the index. """
         _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
         nr_source_frames = video_index.ntotal
         nr_matches = len(D)
+        if nr_matches > 0:
+            logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
         if nr_matches >= nr_source_frames:
             return distance
     logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")