whisper-youtube-2-hf_dataset / storing /sqlitebatchvideostorer.py
juancopi81's picture
Duplicate from Whispering-GPT/whisper-youtube-2-hf_dataset
history blame
1.11 kB
import json
import sqlite3
from typing import List, Tuple
from utils import accepts_types
from video import YoutubeVideo
class SQLiteBatchVideoStorer:
"""This is class is responsible to insert batch video entries in the db."""
def __init__(self, table: str = "video"):
self.table = table
@accepts_types(sqlite3.Cursor, list)
def store(self,
db_cursor: sqlite3.Cursor,
videos: List[YoutubeVideo]) -> None:
"""Batch insert list of videos in the 'video' table of the db."""
video_list = self._convert_videos_to_list(videos)
db_cursor.executemany(f"INSERT INTO {self.table}(channel_name, url, title, description, transcription, segments) VALUES(?, ?, ?, ?, ?, ?)",
def _convert_videos_to_list(videos: List[YoutubeVideo]) -> List[Tuple[str, str, str, str, str, str]]:
for video in videos:
# TODO: Find better way to solve this
video.segments = json.dumps(video.segments)
return [video.to_tuple() for video in videos]