|
import json |
|
from typing import Optional |
|
import yt_dlp |
|
|
|
def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"): |
|
langs = item.keys() |
|
key = lang if langs.get(lang) else ('en' if langs.get('en') else langs[0] ) |
|
for item in langs[key]: |
|
if(item.get("ext") == type): |
|
return item.get("url") |
|
return None |
|
|
|
def getSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]: |
|
ydl_opts = { |
|
"writesubtitles": True, |
|
"allsubtitles": True, |
|
"subtitleslangs": [lang] if lang else [], |
|
"skip_download": True, |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=False) |
|
if info_dict.get("subtitles"): |
|
|
|
subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType) |
|
with ydl.urlopen(subtitle_url) as subtitle: |
|
return subtitle.read().decode() |
|
if info_dict.get("automatic_captions"): |
|
subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), lang, vttType) |
|
with ydl.urlopen(subtitle_url) as subtitle: |
|
return subtitle.read().decode() |
|
return None |
|
|
|
def fetchSubtitleUrls(url: str) -> json: |
|
ydl_opts = { |
|
"writesubtitles": True, |
|
"allsubtitles": True, |
|
"skip_download": True, |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=False) |
|
if info_dict.get("subtitles"): |
|
langs = info_dict.get("subtitles").keys() |
|
if not (len(langs) == 1 and "live_chat" in langs): |
|
return info_dict.get("subtitles") |
|
if info_dict.get("automatic_captions"): |
|
return info_dict.get("automatic_captions") |
|
return None |
|
|
|
def get_subtitle(url, lang='en'): |
|
if lang is None: |
|
lang = 'en' |
|
|
|
ydl_opts = { |
|
'writesubtitles': True, |
|
'outtmpl': '%(id)s.%(ext)s', |
|
'subtitleslangs': [lang], |
|
'skip_download': True, |
|
} |
|
try: |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=True) |
|
video_id = info_dict.get("id", None) |
|
if video_id is None: |
|
return None |
|
|
|
subtitle_file = f"{video_id}.{lang}.vtt" |
|
with open(subtitle_file, 'r') as f: |
|
subtitle_content = f.read() |
|
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content) |
|
return subtitle_content |
|
except error: |
|
print(error) |
|
return None |
|
|
|
return None |