File size: 2,525 Bytes
7288748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from pathlib import Path
import pytest
import os

from youtube_transcriber.preprocessing.youtubevideopreprocessor import YoutubeVideoPreprocessor
from youtube_transcriber.loading.serialization import JsonSerializer

@pytest.fixture
def youtube_video_preprocessor():
    yt_video_preprocessor = YoutubeVideoPreprocessor(mode="channel_name",
                                                     serializer=JsonSerializer())
    load_paths, dataset_folder = yt_video_preprocessor.preprocess(name="Best Shorts Quotes",
                                                                  num_videos=2,
                                                                  videos_in_ds=["GU2_xlNCJrA"])
    return load_paths, dataset_folder

@pytest.fixture
def expected_file_paths():
    youtube_folder = Path.home()/"whisper_gpt_pipeline/youtube_transcriber"
    expected_dir = youtube_folder/"Best Shorts Quotes"
    return [expected_dir/"0.json", expected_dir/"1.json"]

@pytest.fixture
def expected_folder_path():
    youtube_folder = Path.home()/"whisper_gpt_pipeline/youtube_transcriber"
    expected_dir = youtube_folder/"Best Shorts Quotes"
    return expected_dir

def test_youtube_video_preprocessor_init():
    yt_video_preprocessor = YoutubeVideoPreprocessor(mode="channel_name",
                                                     serializer=JsonSerializer())
    assert type(yt_video_preprocessor) == YoutubeVideoPreprocessor
    assert type(yt_video_preprocessor.serializer) == JsonSerializer
    assert yt_video_preprocessor.mode == "channel_name"
    
def test_created_file(youtube_video_preprocessor, expected_file_paths):
    paths, _ = youtube_video_preprocessor
    for path in paths:
        assert os.path.exists(expected_file_paths[0]) == True
        assert os.path.exists(expected_file_paths[1]) == True

def test_created_folder(youtube_video_preprocessor, expected_folder_path):
    _, folder = youtube_video_preprocessor
    assert folder == expected_folder_path

def test_loop_through_created_files(youtube_video_preprocessor):
    expected_data = [
        {
            "channel_name": "Best Shorts Quotes",
            "url": "https://www.youtube.com/watch?v=GU2_xlNCJrA"
        },
        {
            "channel_name": "Best Shorts Quotes",
            "url": "https://www.youtube.com/watch?v=ttRI4EmmxkY"
        }
    ]

    paths, folder = youtube_video_preprocessor
    for i, path in enumerate(paths):
        serializer = JsonSerializer()
        assert serializer.load(path) == expected_data[i]