Spaces:
Paused
Paused
File size: 5,886 Bytes
c59a3c0 cf4f031 db6bb35 9d616d2 c59a3c0 9d616d2 c59a3c0 9d616d2 c59a3c0 00914fa c59a3c0 fb7e4f3 cf4f031 9d616d2 cf4f031 db6bb35 cf4f031 c59a3c0 9d616d2 cf4f031 c59a3c0 9d616d2 cf4f031 9d616d2 fb7e4f3 c59a3c0 9d616d2 cf4f031 c59a3c0 9d616d2 c59a3c0 9d616d2 cf4f031 9d616d2 cf4f031 c59a3c0 9d616d2 cf4f031 9d616d2 c59a3c0 db6bb35 c59a3c0 fb7e4f3 9d616d2 c59a3c0 fb7e4f3 c59a3c0 9d616d2 c59a3c0 fc9492f c59a3c0 fc9492f c59a3c0 9d616d2 c59a3c0 fb7e4f3 fc9492f c59a3c0 9d616d2 fb7e4f3 c59a3c0 fb7e4f3 c993592 db6bb35 c993592 fb7e4f3 c59a3c0 fc9492f c59a3c0 c993592 c59a3c0 9d616d2 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c59a3c0 fb7e4f3 c993592 9d616d2 fb7e4f3 9d616d2 fb7e4f3 c993592 9d616d2 c59a3c0 9d616d2 c59a3c0 fb7e4f3 c59a3c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
import gradio as gr
from tqdm.asyncio import tqdm_asyncio
import os
import time
import asyncio
from concurrent.futures import ThreadPoolExecutor
from pipeline import (
extract_audio_from_video,
transcribe_and_preprocess_audio,
translation_main,
tts_main,
create_combined_output,
create_combined_output_subprocess,
)
from pipeline import translation_hdr, translation_url, LANG_DICT
executor = ThreadPoolExecutor()
async def process_video_translation(
input_video, language, speaker, progress=gr.Progress(track_tqdm=True)
):
if input_video is None:
gr.Info("Please upload a video file", duration=2)
return
total_stages = 5
# add time stamp to output video
timestamp = time.strftime("%M%S")
output_video = f"{input_video.split('.')[0]}_dubbed_{timestamp}.mp4"
# delete the output video if it exists
try:
os.remove(output_video)
print(f"Deleted existing output video: {output_video}")
except FileNotFoundError:
print(f"No existing output video found: {output_video}")
pass
with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:
# stage 1: extract audio from video
progress(0.1, desc="Extracting audio from video")
output_audio_path = await asyncio.get_event_loop().run_in_executor(
executor, extract_audio_from_video, input_video
)
pbar.update(1)
# stage 2: transcribe audio
progress(0.2, desc="Transcribing audio")
sentences = await asyncio.get_event_loop().run_in_executor(
executor, transcribe_and_preprocess_audio, output_audio_path
)
pbar.update(1)
# stage 3: translate to twi
progress(0.4, desc="Translating to Twi")
khaya_translations = await translation_main(
sentences, translation_url, translation_hdr, LANG_DICT[language]
)
pbar.update(1)
# stage 4: convert to speech
progress(0.7, desc="Converting to speech")
output_audio = await tts_main(khaya_translations, speaker, LANG_DICT[language])
# print(tts_output_files)
pbar.update(1)
# stage 5: combine audio streams
progress(1.0, desc="Combining audio and video")
output_video = await asyncio.get_event_loop().run_in_executor(
executor,
create_combined_output_subprocess,
input_video,
output_audio,
output_video,
)
pbar.update(1)
print("Video translation completed")
gr.Info(f"Video translation completed", duration=2)
print(f"Output video: {output_video}")
return output_video
app_theme = gr.themes.Ocean(
text_size="lg",
spacing_size="lg",
)
def update_speaker_choices(language):
if language == "Twi":
return gr.update(choices=["male", "female"], value="male")
elif language == "Ewe":
return gr.update(choices=["male"], value="male")
with gr.Blocks(
theme=app_theme,
title="Video Dubbing Interface",
) as demo:
with gr.Row(variant="compact"):
with gr.Column(
scale=1,
min_width=100,
):
gr.Image(
"Color.png",
show_label=False,
height=100,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
# min_width=100,
# scale=1,
)
with gr.Column(
scale=3,
variant="default",
):
gr.HTML(
"""
<div style="display: flex; align-items: center; justify-content: center;">
<h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;">
African Language Video Dubbing POC
</h1>
</div>
""",
)
with gr.Column(
scale=1,
min_width=100,
):
gr.Image(
"NLPGhana_logo_1.png",
show_label=False,
height=100,
show_download_button=False,
show_fullscreen_button=False,
container=False,
show_share_button=False,
)
gr.HTML("<hr style='margin-top: 0.5em;'>")
gr.HTML("<div style='height: 20px;'></div>")
# main interface components
with gr.Row():
with gr.Column():
input_video = gr.Video(label="Input Video", sources=["upload"])
input_language = gr.Radio(
label="Select Language",
choices=["Twi", "Ewe"],
value="Twi",
min_width=50,
container=True,
show_label=True,
)
print(input_language.value)
speaker_choices = (
["male", "female"] if input_language.value == "Twi" else ["male"]
)
input_speaker = gr.Radio(
label="Select Speaker",
choices=speaker_choices,
value="male",
min_width=50,
container=True,
show_label=True,
)
submit = gr.Button("Process Video", scale=1)
output_video = gr.Video(label="Processed Video")
# Update the speaker choices based on the selected language
input_language.change(
update_speaker_choices,
inputs=input_language,
outputs=input_speaker,
)
submit.click(
process_video_translation,
inputs=[input_video, input_language, input_speaker],
outputs=output_video,
)
gr.HTML("<div style='height: 10px;'></div>")
# Launch the interface
demo.launch(debug=True)
|