ytdlp_subtitle / fetchYoutubeSubtitle.py
lanbogao's picture
Update readme.
ba9fae4
raw
history blame
No virus
2.68 kB
import json
from typing import Optional
import yt_dlp
def getVttUrlFromSubtitles(item, lang='en', vttType="vtt"):
langs = item.keys()
key = lang if langs.get(lang) else ('en' if langs.get('en') else langs[0] )
for item in langs[key]:
if(item.get("ext") == type):
return item.get("url")
return None
def getSubtitle(url: str, lang: Optional[str] = 'en', vttType="vtt") -> Optional[str]:
ydl_opts = {
"writesubtitles": True,
"allsubtitles": True,
"subtitleslangs": [lang] if lang else [],
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
if info_dict.get("subtitles"):
# get first available subtitle
subtitle_url = getVttUrlFromSubtitles(info_dict.get("subtitles"), lang, vttType)
with ydl.urlopen(subtitle_url) as subtitle:
return subtitle.read().decode()
if info_dict.get("automatic_captions"):
subtitle_url = getVttUrlFromSubtitles(info_dict.get("automatic_captions"), , lang, vttType)
with ydl.urlopen(subtitle_url) as subtitle:
return subtitle.read().decode()
return None
def fetchSubtitleUrls(url: str) -> json:
ydl_opts = {
"writesubtitles": True,
"allsubtitles": True,
"skip_download": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
if info_dict.get("subtitles"):
langs = info_dict.get("subtitles").keys()
if not (langs.length == 1 and "live_chat" in langs):
return info_dict.get("subtitles")
if info_dict.get("automatic_captions"):
return info_dict.get("automatic_captions")
return None
def get_subtitle(url, lang='en'):
if lang is None:
lang = 'en'
# Download subtitles if available
ydl_opts = {
'writesubtitles': True,
'outtmpl': '%(id)s.%(ext)s',
'subtitleslangs': [lang],
'skip_download': True,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
video_id = info_dict.get("id", None)
if video_id is None:
return None
subtitle_file = f"{video_id}.{lang}.vtt"
with open(subtitle_file, 'r') as f:
subtitle_content = f.read()
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
return subtitle_content
except error:
print(error)
return None
return None