Spaces:
Paused
Paused
import gradio as gr | |
from tqdm import tqdm | |
from pipeline import ( | |
extract_audio_from_video, | |
transcribe_and_preprocess_audio, | |
translation_main, | |
tts_main, | |
combine_audio_streams, | |
create_combined_output, | |
) | |
from pipeline import translation_hdr, translation_url, LANG | |
async def process_video_translation( | |
input_video, speaker, progress=gr.Progress(track_tqdm=True) | |
): | |
total_stages = 6 | |
output_video = f"{input_video.split('.')[0]}_translated.mp4" | |
with tqdm(total=total_stages, desc="Processing video translation") as pbar: | |
progress(0.1, desc="Extracting audio from video") | |
pbar.update(1) | |
output_audio_path = extract_audio_from_video(input_video) | |
# transcribe audio | |
pbar.set_description("Transcribing audio") | |
pbar.update(1) | |
sentences = transcribe_and_preprocess_audio(output_audio_path) | |
# translate to twi | |
pbar.set_description("Translating to Twi") | |
khaya_translations = await translation_main( | |
sentences, translation_url, translation_hdr, LANG | |
) | |
# create output files | |
print("Creating output files") | |
list_of_output_chunks = [ | |
f"translated_{i}.wav" for i in range(len(khaya_translations)) | |
] | |
pbar.update(1) | |
# convert to speech | |
pbar.set_description("Converting to speech") | |
await tts_main(khaya_translations, speaker, list_of_output_chunks) | |
pbar.update(1) | |
# combine audio streams | |
print("Combining audio streams") | |
pbar.set_description("Combining audio streams") | |
output_audio = combine_audio_streams( | |
list_of_output_chunks, "combined_audio.wav" | |
) | |
pbar.update(1) | |
pbar.set_description("Combining audio and video") | |
create_combined_output(input_video, output_audio, output_video) | |
pbar.update(1) | |
print("Video translation completed") | |
gr.Info(f"Video translation completed", duration=2) | |
return output_video | |
with gr.Blocks( | |
theme=gr.themes.Soft(), | |
title="Video Dubbing Interface", | |
) as demo: | |
with gr.Row(variant="default"): | |
with gr.Column( | |
scale=1, | |
min_width=0, | |
): | |
gr.Image( | |
"logo_2.jpeg", | |
show_label=False, | |
width=150, | |
height=150, | |
show_download_button=False, | |
show_fullscreen_button=False, | |
container=False, | |
) | |
with gr.Column( | |
scale=2, | |
): | |
gr.Markdown("# Video Dubbing Interface", height=100) | |
with gr.Column( | |
scale=1, | |
min_width=0, | |
): | |
gr.Image( | |
"NLPGhana_logo_2.png", | |
show_label=False, | |
width=50, | |
height=150, | |
show_download_button=False, | |
show_fullscreen_button=False, | |
container=False, | |
) | |
# main interface components | |
with gr.Row(): | |
input_video = gr.Video(label="Input Video", sources=["upload"]) | |
input_speaker = gr.Radio( | |
label="Select Speaker", | |
choices=["male", "female"], | |
value="female", | |
min_width=50, | |
container=True, | |
) | |
output_video = gr.Video(label="Processed Video") | |
with gr.Row(): | |
# process video translation | |
submit = gr.Button("Process Video", scale=1) | |
submit.click( | |
process_video_translation, | |
inputs=[input_video, input_speaker], | |
outputs=output_video, | |
) | |
# # Define the Gradio interface | |
# interface = gr.Interface( | |
# fn=process_video_translation, # Function to process the video | |
# inputs=gr.Video(label="Input Video"), # Video file input | |
# outputs=gr.Video(label="Processed Video"), # Video file output | |
# title="Video Processing Interface", | |
# description="Upload a video, and the processed video will be returned.", | |
# theme="light", | |
# ) | |
# Launch the interface | |
demo.launch(debug=True) | |