demo-dubbing / app.py
Lagyamfi's picture
first draft of frontend
c59a3c0
raw
history blame
4.11 kB
import gradio as gr
from tqdm import tqdm
from pipeline import (
extract_audio_from_video,
transcribe_and_preprocess_audio,
translation_main,
tts_main,
combine_audio_streams,
create_combined_output,
)
from pipeline import translation_hdr, translation_url, LANG
async def process_video_translation(
input_video, speaker, progress=gr.Progress(track_tqdm=True)
):
total_stages = 6
output_video = f"{input_video.split('.')[0]}_translated.mp4"
with tqdm(total=total_stages, desc="Processing video translation") as pbar:
progress(0.1, desc="Extracting audio from video")
pbar.update(1)
output_audio_path = extract_audio_from_video(input_video)
# transcribe audio
pbar.set_description("Transcribing audio")
pbar.update(1)
sentences = transcribe_and_preprocess_audio(output_audio_path)
# translate to twi
pbar.set_description("Translating to Twi")
khaya_translations = await translation_main(
sentences, translation_url, translation_hdr, LANG
)
# create output files
print("Creating output files")
list_of_output_chunks = [
f"translated_{i}.wav" for i in range(len(khaya_translations))
]
pbar.update(1)
# convert to speech
pbar.set_description("Converting to speech")
await tts_main(khaya_translations, speaker, list_of_output_chunks)
pbar.update(1)
# combine audio streams
print("Combining audio streams")
pbar.set_description("Combining audio streams")
output_audio = combine_audio_streams(
list_of_output_chunks, "combined_audio.wav"
)
pbar.update(1)
pbar.set_description("Combining audio and video")
create_combined_output(input_video, output_audio, output_video)
pbar.update(1)
print("Video translation completed")
gr.Info(f"Video translation completed", duration=2)
return output_video
with gr.Blocks(
theme=gr.themes.Soft(),
title="Video Dubbing Interface",
) as demo:
with gr.Row(variant="default"):
with gr.Column(
scale=1,
min_width=0,
):
gr.Image(
"logo_2.jpeg",
show_label=False,
width=150,
height=150,
show_download_button=False,
show_fullscreen_button=False,
container=False,
)
with gr.Column(
scale=2,
):
gr.Markdown("# Video Dubbing Interface", height=100)
with gr.Column(
scale=1,
min_width=0,
):
gr.Image(
"NLPGhana_logo_2.png",
show_label=False,
width=50,
height=150,
show_download_button=False,
show_fullscreen_button=False,
container=False,
)
# main interface components
with gr.Row():
input_video = gr.Video(label="Input Video", sources=["upload"])
input_speaker = gr.Radio(
label="Select Speaker",
choices=["male", "female"],
value="female",
min_width=50,
container=True,
)
output_video = gr.Video(label="Processed Video")
with gr.Row():
# process video translation
submit = gr.Button("Process Video", scale=1)
submit.click(
process_video_translation,
inputs=[input_video, input_speaker],
outputs=output_video,
)
# # Define the Gradio interface
# interface = gr.Interface(
# fn=process_video_translation, # Function to process the video
# inputs=gr.Video(label="Input Video"), # Video file input
# outputs=gr.Video(label="Processed Video"), # Video file output
# title="Video Processing Interface",
# description="Upload a video, and the processed video will be returned.",
# theme="light",
# )
# Launch the interface
demo.launch(debug=True)