File size: 1,239 Bytes
ad4bd11
 
 
 
 
 
727e567
 
 
ad4bd11
 
 
 
 
 
 
 
727e567
ad4bd11
 
 
 
 
 
727e567
ad4bd11
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os
import json
import shutil

from videohash import filepath_from_url

# < Algemene Politieke Beschouwing 2022 >
# Load this data based on a .json file to get those videos to compare to.
# This can be updated with any .json file containing other videos.
with open('apb2022.json') as filein:
    urls, videos, url2video, video2url = [], [], {}, {}
    for item in json.load(filein):
        urls.append(item['url'])
        videos.append(item['mp4'])
        url2video[item['url']] = item['mp4']
        video2url[item['mp4']] = item['url']

# Get filepaths for the url's indices in the dataset and copy those to data folder if they're not present
for url in videos:
    filepath = filepath_from_url(url) + '.index'
    datapath = os.path.join('data', os.path.basename(filepath))
    if not os.path.exists(filepath) and os.path.exists(datapath):
        shutil.copyfile(datapath, filepath)

# To manually build the indices for the above dataset.
if __name__ == "__main__":
    from videomatch import get_video_index

    for url in videos:
        get_video_index(url)
        filepath = filepath_from_url(url) + '.index'
        datapath = os.path.join('data', os.path.basename(filepath))
        shutil.copyfile(filepath, datapath)