Spaces:
Build error
Build error
File size: 7,543 Bytes
7971a7a b9cd4c4 7971a7a b9cd4c4 7971a7a 0112deb 7971a7a 0112deb 7971a7a 0112deb 7971a7a 0112deb 7971a7a 0112deb 7971a7a 0112deb 7971a7a 0112deb 7971a7a 0112deb 7971a7a b9cd4c4 0112deb b9cd4c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import os
import logging
import faiss
from kats.detectors.cusum_detection import CUSUMDetector
from kats.detectors.robust_stat_detection import RobustStatDetector
from kats.consts import TimeSeriesData
import numpy as np
import pandas as pd
from videohash import compute_hashes, filepath_from_url
from config import FPS, MIN_DISTANCE, MAX_DISTANCE
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
""" Compute hashes of a video and index the video using faiss indices and return the index. """
filepath = filepath_from_url(url)
if os.path.exists(f'{filepath}.index'):
logging.info(f"Loading indexed hashes from {filepath}.index")
binary_index = faiss.read_index_binary(f'{filepath}.index')
logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
return binary_index
hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
# Initializing the quantizer.
quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
# Initializing index.
index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
index.nprobe = 1 # Number of nearest clusters to be searched per query.
# Training the quantizer.
index.train(hash_vectors)
#index = faiss.IndexBinaryFlat(64)
index.add(hash_vectors)
faiss.write_index_binary(index, f'{filepath}.index')
logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
return index
def get_video_index(url: str):
"""" Builds up a FAISS index for a video.
args:
- filepath: location of the source video
"""
# Url (short video)
video_index = index_hashes_for_video(url)
video_index.make_direct_map() # Make sure the index is indexable
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
return video_index, hash_vectors
def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
""" The comparison between the target and the original video will be plotted based
on the matches between the target and the original video over time. The matches are determined
based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
"""
# The results are returned as a triplet of 1D arrays
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
return lims, D, I, hash_vectors
def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
""" To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
until the number of matches found is equal to or higher than the number of frames in the source video"""
for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
distance = int(distance)
video_index, hash_vectors = get_video_index(filepath)
target_index, _ = get_video_index(target)
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
nr_source_frames = video_index.ntotal
nr_matches = len(D)
logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
if nr_matches >= nr_source_frames:
return distance
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
return None
def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
if method.upper() == "CUSUM":
detector = CUSUMDetector(tsd)
elif method.upper() == "ROBUST":
detector = RobustStatDetector(tsd)
change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
# Print some stats
if method.upper() == "CUSUM" and change_points != []:
mean_offset_prechange = change_points[0].mu0
mean_offset_postchange = change_points[0].mu1
jump_s = mean_offset_postchange - mean_offset_prechange
print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
return change_points
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
_, hash_vectors = get_video_index(url)
target_index, _ = get_video_index(target)
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
target_s = [i/FPS for j in target for i in j]
source_s = [i/FPS for i in I]
# Make df
df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
if vanilla_df:
return df
# Minimum distance dataframe ----
# Group by X so for every second/x there will be 1 value of Y in the end
# index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
# df_min = df.loc[index_min_distance]
# df_min
# -------------------------------
df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
# Group by X so for every second/x there will be 1 value of Y in the end
grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
# Remake the dataframe
df = grouped_X.reset_index()
df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
# Add NAN to "missing" x values (base it off hash vector, not target_s)
step_size = 1/FPS
x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
df['TARGET_S'] = np.round(df['TARGET_S'], 1)
df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])
# Merge dataframes to get NAN values for every missing SOURCE_S
df = df_complete.merge(df, on='TARGET_S', how='left')
# Interpolate between frames since there are missing values
df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
# Add timeshift col and timeshift col with Linearly Interpolated Values
df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
# Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
# Add time column for plotting
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
return df |