demo-dubbing / app.py
Lagyamfi's picture
add new logos and fix display on mobile
fc9492f
import gradio as gr
from tqdm.asyncio import tqdm_asyncio
import os
import time
import asyncio
from concurrent.futures import ThreadPoolExecutor
from pipeline import (
extract_audio_from_video,
transcribe_and_preprocess_audio,
translation_main,
tts_main,
create_combined_output,
create_combined_output_subprocess,
)
from pipeline import translation_hdr, translation_url, LANG_DICT
executor = ThreadPoolExecutor()
async def process_video_translation(
input_video, language, speaker, progress=gr.Progress(track_tqdm=True)
):
if input_video is None:
gr.Info("Please upload a video file", duration=2)
return
total_stages = 5
# add time stamp to output video
timestamp = time.strftime("%M%S")
output_video = f"{input_video.split('.')[0]}_dubbed_{timestamp}.mp4"
# delete the output video if it exists
try:
os.remove(output_video)
print(f"Deleted existing output video: {output_video}")
except FileNotFoundError:
print(f"No existing output video found: {output_video}")
pass
with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:
# stage 1: extract audio from video
progress(0.1, desc="Extracting audio from video")
output_audio_path = await asyncio.get_event_loop().run_in_executor(
executor, extract_audio_from_video, input_video
)
pbar.update(1)
# stage 2: transcribe audio
progress(0.2, desc="Transcribing audio")
sentences = await asyncio.get_event_loop().run_in_executor(
executor, transcribe_and_preprocess_audio, output_audio_path
)
pbar.update(1)
# stage 3: translate to twi
progress(0.4, desc="Translating to Twi")
khaya_translations = await translation_main(
sentences, translation_url, translation_hdr, LANG_DICT[language]
)
pbar.update(1)
# stage 4: convert to speech
progress(0.7, desc="Converting to speech")
output_audio = await tts_main(khaya_translations, speaker, LANG_DICT[language])
# print(tts_output_files)
pbar.update(1)
# stage 5: combine audio streams
progress(1.0, desc="Combining audio and video")
output_video = await asyncio.get_event_loop().run_in_executor(
executor,
create_combined_output_subprocess,
input_video,
output_audio,
output_video,
)
pbar.update(1)
print("Video translation completed")
gr.Info(f"Video translation completed", duration=2)
print(f"Output video: {output_video}")
return output_video
app_theme = gr.themes.Ocean(
text_size="lg",
spacing_size="lg",
)
def update_speaker_choices(language):
if language == "Twi":
return gr.update(choices=["male", "female"], value="male")
elif language == "Ewe":
return gr.update(choices=["male"], value="male")
with gr.Blocks(
theme=app_theme,
title="Video Dubbing Interface",
) as demo:
with gr.Row(variant="compact"):
with gr.Column(
scale=1,
min_width=100,
):
gr.Image(
"Color.png",
show_label=False,
height=100,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
# min_width=100,
# scale=1,
)
with gr.Column(
scale=3,
variant="default",
):
gr.HTML(
"""
<div style="display: flex; align-items: center; justify-content: center;">
<h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;">
African Language Video Dubbing POC
</h1>
</div>
""",
)
with gr.Column(
scale=1,
min_width=100,
):
gr.Image(
"NLPGhana_logo_1.png",
show_label=False,
height=100,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
)
gr.HTML("<hr style='margin-top: 0.5em;'>")
gr.HTML("<div style='height: 20px;'></div>")
# main interface components
with gr.Row():
with gr.Column():
input_video = gr.Video(label="Input Video", sources=["upload"])
input_language = gr.Radio(
label="Select Language",
choices=["Twi", "Ewe"],
value="Twi",
min_width=50,
container=True,
show_label=True,
)
print(input_language.value)
speaker_choices = (
["male", "female"] if input_language.value == "Twi" else ["male"]
)
input_speaker = gr.Radio(
label="Select Speaker",
choices=speaker_choices,
value="male",
min_width=50,
container=True,
show_label=True,
)
submit = gr.Button("Process Video", scale=1)
output_video = gr.Video(label="Processed Video")
# Update the speaker choices based on the selected language
input_language.change(
update_speaker_choices,
inputs=input_language,
outputs=input_speaker,
)
submit.click(
process_video_translation,
inputs=[input_video, input_language, input_speaker],
outputs=output_video,
)
gr.HTML("<div style='height: 10px;'></div>")
# Launch the interface
demo.launch(debug=True)