Spaces:
Sleeping
Sleeping
File size: 4,285 Bytes
fb456bc d839cfe 3424983 f705352 3424983 fb456bc d839cfe f705352 3424983 60c9382 3424983 60c9382 3424983 60c9382 3424983 d839cfe 43f5428 d839cfe c1eeef2 60c9382 c1eeef2 d839cfe a6f578a d839cfe fb456bc 43f5428 13134f5 fb456bc d839cfe fb456bc b8e519c fb456bc b8e519c 3424983 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
import whisper
from pytube import YouTube
from fastapi import FastAPI, Response, Request
import yt_dlp
import uvicorn
CUSTOM_PATH = "/gradio"
app = FastAPI()
langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
model_size = list(whisper._MODELS.keys())
@app.get("/")
def read_main():
return {"message": "This is your main app"}
#async def get_subtitle(url: str):
# Download the subtitle with download_subtitle()
#subtitle_url = download_subtitle(url)
# Stream the subtitle as a response
#return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
@app.get("/subtitle")
async def get_subtitle(url, lang='en'):
# Download subtitles if available
ydl_opts = {
'writesubtitles': True,
'outtmpl': '%(id)s.%(ext)s',
'subtitleslangs': [lang],
'skip_download': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
video_id = info_dict.get("id", None)
if video_id is None:
return None
subtitle_file = f"{video_id}.{lang}.vtt"
with open(subtitle_file, 'r') as f:
subtitle_content = f.read()
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
return subtitle_content
return None
def download_audio(video_url, quality: str = '128', speed: float = None):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': '%(title)s.%(ext)s',
'quiet': True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3', #'opus',
'preferredquality': quality,
}]
}
if speed:
ydl_opts['postprocessors'].append({
'key': 'FFmpegFilter',
'filter_complex': f"atempo={speed}"
})
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([video_url])
audio_file = ydl.prepare_filename(ydl.extract_info(video_url, download=False))
print('audio_file', audio_file)
return audio_file
def get_audio(url):
yt = YouTube(url)
return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
def get_transcript(url, model_size, lang, format):
subtitle = get_subtitle(url, lang)
print(subtitle)
return subtitle
model = whisper.load_model(model_size)
if lang == "None":
lang = None
result = model.transcribe(download_audio(url), fp16=False, language=lang)
if format == "None":
return result["text"]
elif format == ".srt":
return format_to_srt(result["segments"])
def format_to_srt(segments):
output = ""
for i, segment in enumerate(segments):
output += f"{i + 1}\n"
output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
output += f"{segment['text']}\n\n"
return output
def format_timestamp(t):
hh = t//3600
mm = (t - hh*3600)//60
ss = t - hh*3600 - mm*60
mi = (t - int(t))*1000
return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}"
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
with gr.Row():
url = gr.Textbox(placeholder='Youtube video URL', label='URL')
with gr.Row():
model_size = gr.Dropdown(choices=model_size, value='tiny', label="Model")
lang = gr.Dropdown(choices=langs, value="None", label="Language (Optional)")
format = gr.Dropdown(choices=["None", ".srt"], value="None", label="Timestamps? (Optional)")
with gr.Row():
gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
transcribe_btn = gr.Button('Transcribe')
with gr.Column():
outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
transcribe_btn.click(get_transcript, inputs=[url, model_size, lang, format], outputs=outputs)
demo.launch(debug=True)
#io = gr.Interface(gradio_interface)
#app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH)
uvicorn.run(app, host="0.0.0.0", port=7860) |