Spaces:

RTL
/

videomatch

Build error

App Files Files Community

Iskaj commited on Oct 3, 2022

Commit

879e657

•

1 Parent(s): 9061a2e

minor changes to code style and comments

Browse files

Files changed (4) hide show

app.py +12 -29
plot.py +19 -26
videohash.py +1 -0
videomatch.py +2 -3

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ def get_comparison(url, target, MIN_DISTANCE = 4):
     fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
     return fig
-def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
     """ Function for Gradio to combine all helper functions"""
     distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
     if distance == None:
@@ -27,34 +27,14 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
         raise gr.Error("No matches found!")
     video_index, hash_vectors = get_video_index(url)
     target_index, _ = get_video_index(target)
-    lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
-    # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
-    df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
-    change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
-    fig, segment_decisions = plot_segment_comparison(df, change_points)
-    return fig, segment_decisions
-def get_auto_edit_decision(url, target, smoothing_window_size=10):
-    """ Function for Gradio to combine all helper functions"""
-    distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
-    if distance == None:
-        return None
-        raise gr.Error("No matches found!")
-    video_index, hash_vectors = get_video_index(url)
-    target_index, _ = get_video_index(target)
-    lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
-    df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
-    change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
-    edit_decision_list = []
-    for cp in change_points:
-        decision = f"Video at time {cp.start_time} returns {cp.metric}"
-        # edit_decision_list.append(f"Video at time {cp.start_time} returns {cp.metric}")
-    fig = plot_multi_comparison(df, change_points)
-    return fig
 video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
@@ -74,7 +54,10 @@ compare_iface = gr.Interface(fn=get_comparison,
                      examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
 auto_compare_iface = gr.Interface(fn=get_auto_comparison,
-                     inputs=["text", "text", gr.Slider(2, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="Robust")],
                      outputs=["plot", "json"],
                      examples=[[x, video_urls[-1]] for x in video_urls[:-1]])

     fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
     return fig
+def get_auto_comparison(url, target, smoothing_window_size=10, metric="OFFSET_LIP"):
     """ Function for Gradio to combine all helper functions"""
     distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
     if distance == None:
         raise gr.Error("No matches found!")
     video_index, hash_vectors = get_video_index(url)
     target_index, _ = get_video_index(target)
+    # For each video do...
+    for i in range(0, 1):
+        lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
+        df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
+        change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST", metric=metric)
+        fig, segment_decisions = plot_segment_comparison(df, change_points, target)
+    return fig, segment_decisions
 video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
                      examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
 auto_compare_iface = gr.Interface(fn=get_auto_comparison,
+                     inputs=["text",
+                            "text",
+                            gr.Slider(2, 50, 10, step=1),
+                            gr.Dropdown(choices=["OFFSET_LIP", "ROLL_OFFSET_MODE"], value="OFFSET_LIP")],
                      outputs=["plot", "json"],
                      examples=[[x, video_urls[-1]] for x in video_urls[:-1]])

plot.py CHANGED Viewed

@@ -69,29 +69,25 @@ def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
         return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
     return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
-def plot_segment_comparison(df, change_points):
     """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
     fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
-    # sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0])
     sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
-    # sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
-    # Get rolling average offset
-    # window_size = 30
-    # df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).median()
-    # df['ROLL_OFFSET'] = df['OFFSET_LIP'].rolling(window_size, center=False, min_periods=1).apply(lambda x: st.mode(x)[0])
-    metric = 'ROLL_OFFSET_MODE' #'OFFSET'
-    sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
     # Plot linearly interpolated values
-    sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP")
-    # Plot change point as lines
-    # sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,0])
     sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
     timestamps = change_points_to_segments(df, change_points)
     segment_decisions = {}
     seg_i = 0
@@ -99,20 +95,16 @@ def plot_segment_comparison(df, change_points):
     for x in timestamps:
         plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
-    # To get each detected segment and their mean?
-    threshold_diff = 1.5 # Average diff threshold
-    # threshold = 3.0 # s diff threshold
     for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
-        # add_offset = df.iloc[0]['SOURCE_S'] # np.min(df['SOURCE_S'])
         add_offset = np.min(df['SOURCE_S'])
         # Cut out the segment between the segment lines
         segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
         segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
         segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
-        # segment_offsets = np.round(segment_no_nan['OFFSET'], 0)
-        # print(segment_offsets)
         # Calculate mean/median/mode
         # seg_sum_stat = np.mean(segment_offsets)
@@ -120,7 +112,8 @@ def plot_segment_comparison(df, change_points):
         seg_sum_stat = st.mode(segment_offsets)[0][0]
         # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
-        average_diff = np.median(np.abs(segment_offsets - seg_sum_stat))
         # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
         noisy = False if average_diff < threshold_diff else True
@@ -142,17 +135,17 @@ def plot_segment_comparison(df, change_points):
         end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
         origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
         origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
-        video_id = "placeholder_video_id"
         decision = {"Target Start Time" : start_time_str,
                     "Target End Time" : end_time_str,
                     "Source Start Time" : origin_start_time_str,
                     "Source End Time" : origin_end_time_str,
-                    "Video ID" : video_id,
-                    "Explanation" : f"{start_time_str} -> {end_time_str} comes from video {video_id} from {origin_start_time_str} -> {origin_end_time_str}"}
         segment_decisions[f'Segment {seg_i}'] = decision
         seg_i += 1
-        print(decision)
     # Return figure
     plt.xticks(rotation=90)

         return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
     return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
+def plot_segment_comparison(df, change_points, video_id="Placeholder_Video_ID"):
     """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
     fig, ax_arr = plt.subplots(3, 1, figsize=(16, 6), dpi=100, sharex=True)
+    # Plot original datapoints without linear interpolation, offset by target video time
     sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[0], label="OFFSET", alpha=0.5)
     # Plot linearly interpolated values
+    sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1], label="OFFSET_LIP", color='orange')
+    # Plot our target metric wherer
+    metric = 'ROLL_OFFSET_MODE' # 'OFFSET'
+    sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[1], label=metric, alpha=0.5)
+    # Plot deteected change points as lines which will indicate the segments
     sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, s=20)
     timestamps = change_points_to_segments(df, change_points)
+    # To store "decisions" about segments
     segment_decisions = {}
     seg_i = 0
     for x in timestamps:
         plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
+    threshold_diff = 1.5 # Average segment difference threshold for plotting
     for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
+        # Time to add to each origin time to get the correct time back since it is offset by add_offset
         add_offset = np.min(df['SOURCE_S'])
         # Cut out the segment between the segment lines
         segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
         segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
         segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
         # Calculate mean/median/mode
         # seg_sum_stat = np.mean(segment_offsets)
         seg_sum_stat = st.mode(segment_offsets)[0][0]
         # Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
+        average_diff = np.median(np.abs(segment_no_nan['OFFSET_LIP'] - seg_sum_stat))
+        average_offset = np.mean(segment_no_nan['OFFSET_LIP'])
         # If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
         noisy = False if average_diff < threshold_diff else True
         end_time_str = pd.to_datetime(end_time).strftime('%H:%M:%S')
         origin_start_time_str = pd.to_datetime(origin_start_time).strftime('%H:%M:%S')
         origin_end_time_str = pd.to_datetime(origin_end_time).strftime('%H:%M:%S')
         decision = {"Target Start Time" : start_time_str,
                     "Target End Time" : end_time_str,
                     "Source Start Time" : origin_start_time_str,
                     "Source End Time" : origin_end_time_str,
+                    "Source Video ID" : video_id,
+                    "Uncertainty" : np.round(average_diff, 3),
+                    "Average Offset in Seconds" : np.round(average_offset, 3),
+                    "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID '{video_id}' from {origin_start_time_str} -> {origin_end_time_str}"}
         segment_decisions[f'Segment {seg_i}'] = decision
         seg_i += 1
+        # print(decision)
     # Return figure
     plt.xticks(rotation=90)

videohash.py CHANGED Viewed

@@ -18,6 +18,7 @@ def filepath_from_url(url):
 def download_video_from_url(url):
     """Download video from url or return md5 hash as video name"""
     filepath = filepath_from_url(url)
     if not os.path.exists(filepath):
         with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:

 def download_video_from_url(url):
     """Download video from url or return md5 hash as video name"""
+    # TODO: Make work for Google link
     filepath = filepath_from_url(url)
     if not os.path.exists(filepath):
         with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:

videomatch.py CHANGED Viewed

@@ -79,9 +79,8 @@ def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
     logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
     return None
-def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
-    tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
-    # tsd = TimeSeriesData(df.loc[:,['time','ROLL_OFFSET_MODE']])
     if method.upper() == "CUSUM":
         detector = CUSUMDetector(tsd)
     elif method.upper() == "ROBUST":

     logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
     return None
+def get_change_points(df, smoothing_window_size=10, method='CUSUM', metric="OFFSET_LIP"):
+    tsd = TimeSeriesData(df.loc[:,['time', metric]])
     if method.upper() == "CUSUM":
         detector = CUSUMDetector(tsd)
     elif method.upper() == "ROBUST":