Spaces:
Build error
Build error
First implementation of index and compare APIs.
Browse filesCo-authored-by: iskaj <[email protected]>
Co-authored-by: Prajakta Shouche <[email protected]>
- app.py +120 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
2 |
+
import urllib.request
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import hashlib
|
6 |
+
|
7 |
+
import pandas
|
8 |
+
import gradio as gr
|
9 |
+
from moviepy.editor import VideoFileClip
|
10 |
+
|
11 |
+
import imagehash
|
12 |
+
from PIL import Image
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import matplotlib
|
16 |
+
matplotlib.use('SVG')
|
17 |
+
import matplotlib.pyplot as plt
|
18 |
+
|
19 |
+
import faiss
|
20 |
+
|
21 |
+
logging.basicConfig()
|
22 |
+
logging.getLogger().setLevel(logging.DEBUG)
|
23 |
+
|
24 |
+
|
25 |
+
video_directory = tempfile.gettempdir()
|
26 |
+
|
27 |
+
def download_video_from_url(url):
|
28 |
+
"""Download video from url or return md5 hash as video name"""
|
29 |
+
filename = os.path.join(video_directory, hashlib.md5(url.encode()).hexdigest())
|
30 |
+
if not os.path.exists(filename):
|
31 |
+
with (urllib.request.urlopen(url)) as f, open(filename, 'wb') as fileout:
|
32 |
+
fileout.write(f.read())
|
33 |
+
logging.info(f"Downloaded video from {url} to {filename}.")
|
34 |
+
return filename
|
35 |
+
|
36 |
+
def change_ffmpeg_fps(clip, fps=5):
|
37 |
+
# Hacking the ffmpeg call based on
|
38 |
+
# https://github.com/Zulko/moviepy/blob/master/moviepy/video/io/ffmpeg_reader.py#L126
|
39 |
+
import subprocess as sp
|
40 |
+
|
41 |
+
cmd = [arg + ",fps=%d" % fps if arg.startswith("scale=") else arg for arg in clip.reader.proc.args]
|
42 |
+
clip.reader.close()
|
43 |
+
clip.reader.proc = sp.Popen(cmd, bufsize=clip.reader.bufsize,
|
44 |
+
stdout=sp.PIPE, stderr=sp.PIPE, stdin=sp.DEVNULL)
|
45 |
+
clip.fps = clip.reader.fps = fps
|
46 |
+
clip.reader.lastread = clip.reader.read_frame()
|
47 |
+
return clip
|
48 |
+
|
49 |
+
def compute_hash(frame, hash_size=16):
|
50 |
+
image = Image.fromarray(np.array(frame))
|
51 |
+
return imagehash.phash(image, hash_size)
|
52 |
+
|
53 |
+
def binary_array_to_uint8s(arr):
|
54 |
+
bit_string = ''.join(str(1 * x) for l in arr for x in l)
|
55 |
+
return [int(bit_string[i:i+8], 2) for i in range(0, len(bit_string), 8)]
|
56 |
+
|
57 |
+
def compute_hashes(clip, fps=5):
|
58 |
+
for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
|
59 |
+
hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
|
60 |
+
yield {"frame": 1+index*fps, "hash": hashed}
|
61 |
+
|
62 |
+
def index_hashes_for_video(url):
|
63 |
+
filename = download_video_from_url(url)
|
64 |
+
if os.path.exists(f'{filename}.index'):
|
65 |
+
return faiss.read_index_binary(f'{filename}.index')
|
66 |
+
|
67 |
+
hash_vectors = np.array([x['hash'] for x in compute_hashes(VideoFileClip(filename))])
|
68 |
+
logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
|
69 |
+
|
70 |
+
# Initializing the quantizer.
|
71 |
+
quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
|
72 |
+
# Initializing index.
|
73 |
+
index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
|
74 |
+
index.nprobe = 1 # Number of nearest clusters to be searched per query.
|
75 |
+
# Training the quantizer.
|
76 |
+
index.train(hash_vectors)
|
77 |
+
#index = faiss.IndexBinaryFlat(64)
|
78 |
+
index.add(hash_vectors)
|
79 |
+
faiss.write_index_binary(index, f'{filename}.index')
|
80 |
+
logging.info(f"Indexed hashes for {index.ntotal} frames to {filename}.index.")
|
81 |
+
return index
|
82 |
+
|
83 |
+
def compare_videos(url, target):
|
84 |
+
video_index = index_hashes_for_video(url)
|
85 |
+
target_indices = [index_hashes_for_video(x) for x in [target]]
|
86 |
+
|
87 |
+
video_index.make_direct_map()
|
88 |
+
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)])
|
89 |
+
|
90 |
+
# The results are returned as a triplet of 1D arrays
|
91 |
+
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
92 |
+
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
93 |
+
lims, D, I = target_indices[0].range_search(hash_vectors, 20)
|
94 |
+
|
95 |
+
min_distance = [D[lims[i]] for i in range(video_index.ntotal)]
|
96 |
+
|
97 |
+
import matplotlib.pyplot as plt
|
98 |
+
|
99 |
+
ax = plt.figure()
|
100 |
+
plt.plot(min_distance)
|
101 |
+
return ax
|
102 |
+
|
103 |
+
video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
|
104 |
+
"https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
|
105 |
+
"https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
|
106 |
+
"https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
|
107 |
+
|
108 |
+
index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
|
109 |
+
inputs="text", outputs="text",
|
110 |
+
examples=video_urls, cache_examples=True)
|
111 |
+
|
112 |
+
compare_iface = gr.Interface(fn=compare_videos,
|
113 |
+
inputs=["text", "text"], outputs="plot",
|
114 |
+
examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
|
115 |
+
|
116 |
+
iface = gr.TabbedInterface([index_iface, compare_iface], ["Index", "Compare"])
|
117 |
+
|
118 |
+
if __name__ == "__main__":
|
119 |
+
iface.launch()
|
120 |
+
#iface.launch(auth=("test", "test"), share=True, debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.1.7
|
2 |
+
moviepy==1.0.3
|
3 |
+
imagehash==4.2.1
|
4 |
+
pandas==1.4.3
|
5 |
+
faiss-cpu==1.7.2
|
6 |
+
Pillow==9.2.0
|