Spaces:
Paused
Paused
import gradio as gr | |
from tqdm.asyncio import tqdm_asyncio | |
import os | |
import time | |
import asyncio | |
from concurrent.futures import ThreadPoolExecutor | |
from pipeline import ( | |
extract_audio_from_video, | |
transcribe_and_preprocess_audio, | |
translation_main, | |
tts_main, | |
create_combined_output, | |
create_combined_output_subprocess, | |
) | |
from pipeline import translation_hdr, translation_url, LANG_DICT | |
executor = ThreadPoolExecutor() | |
async def process_video_translation( | |
input_video, language, speaker, progress=gr.Progress(track_tqdm=True) | |
): | |
if input_video is None: | |
gr.Info("Please upload a video file", duration=2) | |
return | |
total_stages = 5 | |
# add time stamp to output video | |
timestamp = time.strftime("%M%S") | |
output_video = f"{input_video.split('.')[0]}_dubbed_{timestamp}.mp4" | |
# delete the output video if it exists | |
try: | |
os.remove(output_video) | |
print(f"Deleted existing output video: {output_video}") | |
except FileNotFoundError: | |
print(f"No existing output video found: {output_video}") | |
pass | |
with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar: | |
# stage 1: extract audio from video | |
progress(0.1, desc="Extracting audio from video") | |
output_audio_path = await asyncio.get_event_loop().run_in_executor( | |
executor, extract_audio_from_video, input_video | |
) | |
pbar.update(1) | |
# stage 2: transcribe audio | |
progress(0.2, desc="Transcribing audio") | |
sentences = await asyncio.get_event_loop().run_in_executor( | |
executor, transcribe_and_preprocess_audio, output_audio_path | |
) | |
pbar.update(1) | |
# stage 3: translate to twi | |
progress(0.4, desc="Translating to Twi") | |
khaya_translations = await translation_main( | |
sentences, translation_url, translation_hdr, LANG_DICT[language] | |
) | |
pbar.update(1) | |
# stage 4: convert to speech | |
progress(0.7, desc="Converting to speech") | |
output_audio = await tts_main(khaya_translations, speaker, LANG_DICT[language]) | |
# print(tts_output_files) | |
pbar.update(1) | |
# stage 5: combine audio streams | |
progress(1.0, desc="Combining audio and video") | |
output_video = await asyncio.get_event_loop().run_in_executor( | |
executor, | |
create_combined_output_subprocess, | |
input_video, | |
output_audio, | |
output_video, | |
) | |
pbar.update(1) | |
print("Video translation completed") | |
gr.Info(f"Video translation completed", duration=2) | |
print(f"Output video: {output_video}") | |
return output_video | |
app_theme = gr.themes.Ocean( | |
text_size="lg", | |
spacing_size="lg", | |
) | |
def update_speaker_choices(language): | |
if language == "Twi": | |
return gr.update(choices=["male", "female"], value="male") | |
elif language == "Ewe": | |
return gr.update(choices=["male"], value="male") | |
with gr.Blocks( | |
theme=app_theme, | |
title="Video Dubbing Interface", | |
) as demo: | |
with gr.Row(variant="compact"): | |
with gr.Column( | |
scale=1, | |
min_width=100, | |
): | |
gr.Image( | |
"Color.png", | |
show_label=False, | |
height=100, | |
show_download_button=False, | |
show_fullscreen_button=False, | |
container=False, | |
show_share_button=False, | |
# min_width=100, | |
# scale=1, | |
) | |
with gr.Column( | |
scale=3, | |
variant="default", | |
): | |
gr.HTML( | |
""" | |
<div style="display: flex; align-items: center; justify-content: center;"> | |
<h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;"> | |
African Language Video Dubbing POC | |
</h1> | |
</div> | |
""", | |
) | |
with gr.Column( | |
scale=1, | |
min_width=100, | |
): | |
gr.Image( | |
"NLPGhana_logo_1.png", | |
show_label=False, | |
height=100, | |
show_download_button=False, | |
show_fullscreen_button=False, | |
container=False, | |
show_share_button=False, | |
) | |
gr.HTML("<hr style='margin-top: 0.5em;'>") | |
gr.HTML("<div style='height: 20px;'></div>") | |
# main interface components | |
with gr.Row(): | |
with gr.Column(): | |
input_video = gr.Video(label="Input Video", sources=["upload"]) | |
input_language = gr.Radio( | |
label="Select Language", | |
choices=["Twi", "Ewe"], | |
value="Twi", | |
min_width=50, | |
container=True, | |
show_label=True, | |
) | |
print(input_language.value) | |
speaker_choices = ( | |
["male", "female"] if input_language.value == "Twi" else ["male"] | |
) | |
input_speaker = gr.Radio( | |
label="Select Speaker", | |
choices=speaker_choices, | |
value="male", | |
min_width=50, | |
container=True, | |
show_label=True, | |
) | |
submit = gr.Button("Process Video", scale=1) | |
output_video = gr.Video(label="Processed Video") | |
# Update the speaker choices based on the selected language | |
input_language.change( | |
update_speaker_choices, | |
inputs=input_language, | |
outputs=input_speaker, | |
) | |
submit.click( | |
process_video_translation, | |
inputs=[input_video, input_language, input_speaker], | |
outputs=output_video, | |
) | |
gr.HTML("<div style='height: 10px;'></div>") | |
# Launch the interface | |
demo.launch(debug=True) | |