dodijk commited on
Commit
7971a7a
1 Parent(s): 48017c0

Refactor app.py into separate files

Browse files
Files changed (4) hide show
  1. app.py +12 -177
  2. config.py +7 -0
  3. videohash.py +57 -0
  4. videomatch.py +100 -0
app.py CHANGED
@@ -1,166 +1,23 @@
1
- import tempfile
2
- import urllib.request
3
  import logging
4
- import os
5
- import hashlib
6
- import datetime
7
  import time
8
 
9
  import pandas
10
  import gradio as gr
11
- from moviepy.editor import VideoFileClip
12
 
13
  import seaborn as sns
14
  import matplotlib.pyplot as plt
15
 
16
- import imagehash
17
- from PIL import Image
18
-
19
  import numpy as np
20
  import pandas as pd
21
- import faiss
22
-
23
- import shutil
24
-
25
- from kats.detectors.cusum_detection import CUSUMDetector
26
- from kats.detectors.robust_stat_detection import RobustStatDetector
27
- from kats.consts import TimeSeriesData
28
-
29
- FPS = 5
30
- MIN_DISTANCE = 4
31
- MAX_DISTANCE = 30
32
-
33
- video_directory = tempfile.gettempdir()
34
-
35
- def move_video_to_tempdir(input_dir, filename):
36
- new_filename = os.path.join(video_directory, filename)
37
- input_file = os.path.join(input_dir, filename)
38
- if not os.path.exists(new_filename):
39
- shutil.copyfile(input_file, new_filename)
40
- logging.info(f"Copied {input_file} to {new_filename}.")
41
- else:
42
- logging.info(f"Skipping copying from {input_file} because {new_filename} already exists.")
43
- return new_filename
44
-
45
- def download_video_from_url(url):
46
- """Download video from url or return md5 hash as video name"""
47
- filename = filename_from_url(url)
48
- if not os.path.exists(filename):
49
- with (urllib.request.urlopen(url)) as f, open(filename, 'wb') as fileout:
50
- fileout.write(f.read())
51
- logging.info(f"Downloaded video from {url} to {filename}.")
52
- else:
53
- logging.info(f"Skipping downloading from {url} because {filename} already exists.")
54
- return filename
55
-
56
- def change_ffmpeg_fps(clip, fps=FPS):
57
- # Hacking the ffmpeg call based on
58
- # https://github.com/Zulko/moviepy/blob/master/moviepy/video/io/ffmpeg_reader.py#L126
59
- import subprocess as sp
60
-
61
- cmd = [arg + ",fps=%d" % fps if arg.startswith("scale=") else arg for arg in clip.reader.proc.args]
62
- clip.reader.close()
63
- clip.reader.proc = sp.Popen(cmd, bufsize=clip.reader.bufsize,
64
- stdout=sp.PIPE, stderr=sp.PIPE, stdin=sp.DEVNULL)
65
- clip.fps = clip.reader.fps = fps
66
- clip.reader.lastread = clip.reader.read_frame()
67
- return clip
68
-
69
- def compute_hash(frame, hash_size=16):
70
- image = Image.fromarray(np.array(frame))
71
- return imagehash.phash(image, hash_size)
72
-
73
- def binary_array_to_uint8s(arr):
74
- bit_string = ''.join(str(1 * x) for l in arr for x in l)
75
- return [int(bit_string[i:i+8], 2) for i in range(0, len(bit_string), 8)]
76
-
77
- def compute_hashes(clip, fps=FPS):
78
- for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
79
- # Each frame is a triplet of size (height, width, 3) of the video since it is RGB
80
- # The hash itself is of size (hash_size, hash_size)
81
- # The uint8 version of the hash is of size (hash_size * highfreq_factor,) and represents the hash
82
- hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
83
- yield {"frame": 1+index*fps, "hash": hashed}
84
-
85
- def index_hashes_for_video(url, is_file = False):
86
- """ Download a video if it is a url, otherwise refer to the file. Secondly index the video
87
- using faiss indices and return thi index. """
88
- if not is_file:
89
- filename = download_video_from_url(url)
90
- else:
91
- filename = url
92
- if os.path.exists(f'{filename}.index'):
93
- logging.info(f"Loading indexed hashes from {filename}.index")
94
- binary_index = faiss.read_index_binary(f'{filename}.index')
95
- logging.info(f"Index {filename}.index has in total {binary_index.ntotal} frames")
96
- return binary_index
97
-
98
- download_video_from_url(url)
99
-
100
- hash_vectors = np.array([x['hash'] for x in compute_hashes(VideoFileClip(filename))])
101
- logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
102
-
103
- # Initializing the quantizer.
104
- quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
105
- # Initializing index.
106
- index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
107
- index.nprobe = 1 # Number of nearest clusters to be searched per query.
108
- # Training the quantizer.
109
- index.train(hash_vectors)
110
- #index = faiss.IndexBinaryFlat(64)
111
- index.add(hash_vectors)
112
- faiss.write_index_binary(index, f'{filename}.index')
113
- logging.info(f"Indexed hashes for {index.ntotal} frames to {filename}.index.")
114
- return index
115
-
116
- def get_video_indices(url, target, MIN_DISTANCE = 4):
117
- """" The comparison between the target and the original video will be plotted based
118
- on the matches between the target and the original video over time. The matches are determined
119
- based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
120
-
121
- args:
122
- - url: url of the source video (short video which you want to be checked)
123
- - target: url of the target video (longer video which is a superset of the source video)
124
- - MIN_DISTANCE: integer representing the minimum distance between hashes on bit-level before its considered a match
125
- """
126
- # TODO: Fix crash if no matches are found
127
- is_file = False
128
- if url.endswith('.mp4'):
129
- is_file = True
130
-
131
- # Url (short video)
132
- video_index = index_hashes_for_video(url, is_file)
133
- video_index.make_direct_map() # Make sure the index is indexable
134
- hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
135
-
136
- # Target video (long video)
137
- target_indices = [index_hashes_for_video(x) for x in [target]]
138
-
139
- return video_index, hash_vectors, target_indices
140
-
141
- def compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = 3): # , is_file = False):
142
- """ Search for matches between the indices of the target video (long video)
143
- and the given hash vectors of a video"""
144
- # The results are returned as a triplet of 1D arrays
145
- # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
146
- # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
147
- lims, D, I = target_indices[0].range_search(hash_vectors, MIN_DISTANCE)
148
- return lims, D, I, hash_vectors
149
-
150
- def get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE):
151
- """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
152
- until the number of matches found is equal to or higher than the number of frames in the source video"""
153
- for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
154
- distance = int(distance)
155
- video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
156
- lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
157
- nr_source_frames = video_index.ntotal
158
- nr_matches = len(D)
159
- logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
160
- if nr_matches >= nr_source_frames:
161
- return distance
162
- logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
163
- return None
164
 
165
  def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
166
  sns.set_theme()
@@ -193,9 +50,6 @@ def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
193
  plt.subplots_adjust(bottom=0.25, left=0.20)
194
  return fig
195
 
196
- logging.basicConfig()
197
- logging.getLogger().setLevel(logging.INFO)
198
-
199
  def plot_multi_comparison(df, change_points):
200
  """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
201
  fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
@@ -218,7 +72,7 @@ def plot_multi_comparison(df, change_points):
218
  def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
219
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
220
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
221
- lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
222
 
223
  target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
224
  target_s = [i/FPS for j in target for i in j]
@@ -272,26 +126,10 @@ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
272
  df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
273
  return df
274
 
275
- def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
276
- tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
277
- if method.upper() == "CUSUM":
278
- detector = CUSUMDetector(tsd)
279
- elif method.upper() == "ROBUST":
280
- detector = RobustStatDetector(tsd)
281
- change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
282
-
283
- # Print some stats
284
- if method.upper() == "CUSUM" and change_points != []:
285
- mean_offset_prechange = change_points[0].mu0
286
- mean_offset_postchange = change_points[0].mu1
287
- jump_s = mean_offset_postchange - mean_offset_prechange
288
- print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
289
- return change_points
290
-
291
  def get_comparison(url, target, MIN_DISTANCE = 4):
292
  """ Function for Gradio to combine all helper functions"""
293
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
294
- lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = MIN_DISTANCE)
295
  fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
296
  return fig
297
 
@@ -301,7 +139,7 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
301
  if distance == None:
302
  raise gr.Error("No matches found!")
303
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
304
- lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
305
  # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
306
  df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
307
  change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
@@ -337,8 +175,5 @@ if __name__ == "__main__":
337
  import matplotlib
338
  matplotlib.use('SVG') # To be able to plot in gradio
339
 
340
- logging.basicConfig()
341
- logging.getLogger().setLevel(logging.INFO)
342
-
343
  iface.launch(inbrowser=True, debug=True)
344
  #iface.launch(auth=("test", "test"), share=True, debug=True)
 
 
 
1
  import logging
 
 
 
2
  import time
3
 
4
  import pandas
5
  import gradio as gr
 
6
 
7
  import seaborn as sns
8
  import matplotlib.pyplot as plt
9
 
 
 
 
10
  import numpy as np
11
  import pandas as pd
12
+
13
+ from config import *
14
+ from videomatch import index_hashes_for_video, get_decent_distance, \
15
+ get_video_indices, compare_videos, get_change_points
16
+
17
+
18
+ logging.basicConfig()
19
+ logging.getLogger().setLevel(logging.INFO)
20
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
23
  sns.set_theme()
 
50
  plt.subplots_adjust(bottom=0.25, left=0.20)
51
  return fig
52
 
 
 
 
53
  def plot_multi_comparison(df, change_points):
54
  """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
55
  fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
 
72
  def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
73
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
74
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
75
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
76
 
77
  target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
78
  target_s = [i/FPS for j in target for i in j]
 
126
  df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
127
  return df
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def get_comparison(url, target, MIN_DISTANCE = 4):
130
  """ Function for Gradio to combine all helper functions"""
131
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
132
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = MIN_DISTANCE)
133
  fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
134
  return fig
135
 
 
139
  if distance == None:
140
  raise gr.Error("No matches found!")
141
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
142
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
143
  # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
144
  df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
145
  change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
 
175
  import matplotlib
176
  matplotlib.use('SVG') # To be able to plot in gradio
177
 
 
 
 
178
  iface.launch(inbrowser=True, debug=True)
179
  #iface.launch(auth=("test", "test"), share=True, debug=True)
config.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+
3
+ VIDEO_DIRECTORY = tempfile.gettempdir()
4
+
5
+ FPS = 5
6
+ MIN_DISTANCE = 4
7
+ MAX_DISTANCE = 30
videohash.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request
3
+ import logging
4
+ import hashlib
5
+
6
+ from PIL import Image
7
+ import imagehash
8
+ from moviepy.editor import VideoFileClip
9
+ import numpy as np
10
+
11
+ from config import FPS, VIDEO_DIRECTORY
12
+
13
+
14
+ def filepath_from_url(url):
15
+ """Return filepath based on a md5 hash of a url."""
16
+ return os.path.join(VIDEO_DIRECTORY, hashlib.md5(url.encode()).hexdigest())
17
+
18
+ def download_video_from_url(url):
19
+ """Download video from url or return md5 hash as video name"""
20
+ filepath = filepath_from_url(url)
21
+ if not os.path.exists(filepath):
22
+ with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
23
+ fileout.write(f.read())
24
+ logging.info(f"Downloaded video from {url} to {filepath}.")
25
+ else:
26
+ logging.info(f"Skipping downloading from {url} because {filepath} already exists.")
27
+ return filepath
28
+
29
+ def change_ffmpeg_fps(clip, fps=FPS):
30
+ # Hacking the ffmpeg call based on
31
+ # https://github.com/Zulko/moviepy/blob/master/moviepy/video/io/ffmpeg_reader.py#L126
32
+ import subprocess as sp
33
+
34
+ cmd = [arg + ",fps=%d" % fps if arg.startswith("scale=") else arg for arg in clip.reader.proc.args]
35
+ clip.reader.close()
36
+ clip.reader.proc = sp.Popen(cmd, bufsize=clip.reader.bufsize,
37
+ stdout=sp.PIPE, stderr=sp.PIPE, stdin=sp.DEVNULL)
38
+ clip.fps = clip.reader.fps = fps
39
+ clip.reader.lastread = clip.reader.read_frame()
40
+ return clip
41
+
42
+ def compute_hash(frame, hash_size=16):
43
+ image = Image.fromarray(np.array(frame))
44
+ return imagehash.phash(image, hash_size)
45
+
46
+ def binary_array_to_uint8s(arr):
47
+ bit_string = ''.join(str(1 * x) for l in arr for x in l)
48
+ return [int(bit_string[i:i+8], 2) for i in range(0, len(bit_string), 8)]
49
+
50
+ def compute_hashes(url: str, fps=FPS):
51
+ clip = VideoFileClip(download_video_from_url(url))
52
+ for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
53
+ # Each frame is a triplet of size (height, width, 3) of the video since it is RGB
54
+ # The hash itself is of size (hash_size, hash_size)
55
+ # The uint8 version of the hash is of size (hash_size * highfreq_factor,) and represents the hash
56
+ hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
57
+ yield {"frame": 1+index*fps, "hash": hashed}
videomatch.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ import faiss
5
+
6
+ from kats.detectors.cusum_detection import CUSUMDetector
7
+ from kats.detectors.robust_stat_detection import RobustStatDetector
8
+ from kats.consts import TimeSeriesData
9
+
10
+ import numpy as np
11
+
12
+ from videohash import compute_hashes, filepath_from_url
13
+
14
+ def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
15
+ """ Compute hashes of a video and index the video using faiss indices and return the index. """
16
+ filepath = filepath_from_url(url)
17
+ if os.path.exists(f'{filepath}.index'):
18
+ logging.info(f"Loading indexed hashes from {filepath}.index")
19
+ binary_index = faiss.read_index_binary(f'{filepath}.index')
20
+ logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
21
+ return binary_index
22
+
23
+ hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
24
+ logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
25
+
26
+ # Initializing the quantizer.
27
+ quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
28
+ # Initializing index.
29
+ index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
30
+ index.nprobe = 1 # Number of nearest clusters to be searched per query.
31
+ # Training the quantizer.
32
+ index.train(hash_vectors)
33
+ #index = faiss.IndexBinaryFlat(64)
34
+ index.add(hash_vectors)
35
+ faiss.write_index_binary(index, f'{filepath}.index')
36
+ logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
37
+ return index
38
+
39
+ def get_video_indices(filepath: str, target: str, MIN_DISTANCE: int = 4):
40
+ """" The comparison between the target and the original video will be plotted based
41
+ on the matches between the target and the original video over time. The matches are determined
42
+ based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
43
+
44
+ args:
45
+ - url: url of the source video (short video which you want to be checked)
46
+ - target: url of the target video (longer video which is a superset of the source video)
47
+ - MIN_DISTANCE: integer representing the minimum distance between hashes on bit-level before its considered a match
48
+ """
49
+ # TODO: Fix crash if no matches are found
50
+
51
+ # Url (short video)
52
+ video_index = index_hashes_for_video(filepath)
53
+ video_index.make_direct_map() # Make sure the index is indexable
54
+ hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
55
+
56
+ # Target video (long video)
57
+ target_indices = [index_hashes_for_video(x) for x in [target]]
58
+
59
+ return video_index, hash_vectors, target_indices
60
+
61
+ def compare_videos(hash_vectors, target_indices, MIN_DISTANCE = 3):
62
+ """ Search for matches between the indices of the target video (long video)
63
+ and the given hash vectors of a video"""
64
+ # The results are returned as a triplet of 1D arrays
65
+ # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
66
+ # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
67
+ for index in target_indices:
68
+ lims, D, I = index.range_search(hash_vectors, MIN_DISTANCE)
69
+ return lims, D, I, hash_vectors
70
+
71
+ def get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE):
72
+ """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
73
+ until the number of matches found is equal to or higher than the number of frames in the source video"""
74
+ for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
75
+ distance = int(distance)
76
+ video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
77
+ lims, D, I, hash_vectors = compare_videos(hash_vectors, target_indices, MIN_DISTANCE = distance)
78
+ nr_source_frames = video_index.ntotal
79
+ nr_matches = len(D)
80
+ logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
81
+ if nr_matches >= nr_source_frames:
82
+ return distance
83
+ logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
84
+ return None
85
+
86
+ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
87
+ tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
88
+ if method.upper() == "CUSUM":
89
+ detector = CUSUMDetector(tsd)
90
+ elif method.upper() == "ROBUST":
91
+ detector = RobustStatDetector(tsd)
92
+ change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
93
+
94
+ # Print some stats
95
+ if method.upper() == "CUSUM" and change_points != []:
96
+ mean_offset_prechange = change_points[0].mu0
97
+ mean_offset_postchange = change_points[0].mu1
98
+ jump_s = mean_offset_postchange - mean_offset_prechange
99
+ print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
100
+ return change_points