Spaces:

Ghana-NLP
/

demo-dubbing

Paused

App Files Files Community

demo-dubbing / app.py

Lagyamfi

add new logos and fix display on mobile

fc9492f 4 months ago

raw

history blame contribute delete

5.89 kB

	import gradio as gr
	from tqdm.asyncio import tqdm_asyncio
	import os
	import time
	import asyncio
	from concurrent.futures import ThreadPoolExecutor

	from pipeline import (
	extract_audio_from_video,
	transcribe_and_preprocess_audio,
	translation_main,
	tts_main,
	create_combined_output,
	create_combined_output_subprocess,
	)
	from pipeline import translation_hdr, translation_url, LANG_DICT

	executor = ThreadPoolExecutor()


	async def process_video_translation(
	input_video, language, speaker, progress=gr.Progress(track_tqdm=True)
	):
	if input_video is None:
	gr.Info("Please upload a video file", duration=2)
	return

	total_stages = 5

	# add time stamp to output video
	timestamp = time.strftime("%M%S")
	output_video = f"{input_video.split('.')[0]}_dubbed_{timestamp}.mp4"

	# delete the output video if it exists
	try:
	os.remove(output_video)
	print(f"Deleted existing output video: {output_video}")
	except FileNotFoundError:
	print(f"No existing output video found: {output_video}")
	pass

	with tqdm_asyncio(total=total_stages, desc="Processing video translation") as pbar:

	# stage 1: extract audio from video
	progress(0.1, desc="Extracting audio from video")
	output_audio_path = await asyncio.get_event_loop().run_in_executor(
	executor, extract_audio_from_video, input_video
	)
	pbar.update(1)

	# stage 2: transcribe audio
	progress(0.2, desc="Transcribing audio")
	sentences = await asyncio.get_event_loop().run_in_executor(
	executor, transcribe_and_preprocess_audio, output_audio_path
	)
	pbar.update(1)

	# stage 3: translate to twi
	progress(0.4, desc="Translating to Twi")
	khaya_translations = await translation_main(
	sentences, translation_url, translation_hdr, LANG_DICT[language]
	)
	pbar.update(1)

	# stage 4: convert to speech
	progress(0.7, desc="Converting to speech")
	output_audio = await tts_main(khaya_translations, speaker, LANG_DICT[language])
	# print(tts_output_files)
	pbar.update(1)

	# stage 5: combine audio streams
	progress(1.0, desc="Combining audio and video")
	output_video = await asyncio.get_event_loop().run_in_executor(
	executor,
	create_combined_output_subprocess,
	input_video,
	output_audio,
	output_video,
	)
	pbar.update(1)

	print("Video translation completed")
	gr.Info(f"Video translation completed", duration=2)

	print(f"Output video: {output_video}")
	return output_video


	app_theme = gr.themes.Ocean(
	text_size="lg",
	spacing_size="lg",
	)


	def update_speaker_choices(language):
	if language == "Twi":
	return gr.update(choices=["male", "female"], value="male")
	elif language == "Ewe":
	return gr.update(choices=["male"], value="male")


	with gr.Blocks(
	theme=app_theme,
	title="Video Dubbing Interface",
	) as demo:
	with gr.Row(variant="compact"):
	with gr.Column(
	scale=1,
	min_width=100,
	):
	gr.Image(
	"Color.png",
	show_label=False,
	height=100,
	show_download_button=False,
	show_fullscreen_button=False,
	container=False,
	show_share_button=False,
	# min_width=100,
	# scale=1,
	)
	with gr.Column(
	scale=3,
	variant="default",
	):
	gr.HTML(
	"""
	<div style="display: flex; align-items: center; justify-content: center;">
	<h1 style="font-size: 2em; font-weight: bold; margin-top: 1em;">
	African Language Video Dubbing POC
	</h1>
	</div>

	""",
	)
	with gr.Column(
	scale=1,
	min_width=100,
	):
	gr.Image(
	"NLPGhana_logo_1.png",
	show_label=False,
	height=100,
	show_download_button=False,
	show_fullscreen_button=False,
	container=False,
	show_share_button=False,
	)
	gr.HTML("<hr style='margin-top: 0.5em;'>")

	gr.HTML("<div style='height: 20px;'></div>")

	# main interface components
	with gr.Row():
	with gr.Column():
	input_video = gr.Video(label="Input Video", sources=["upload"])
	input_language = gr.Radio(
	label="Select Language",
	choices=["Twi", "Ewe"],
	value="Twi",
	min_width=50,
	container=True,
	show_label=True,
	)
	print(input_language.value)
	speaker_choices = (
	["male", "female"] if input_language.value == "Twi" else ["male"]
	)
	input_speaker = gr.Radio(
	label="Select Speaker",
	choices=speaker_choices,
	value="male",
	min_width=50,
	container=True,
	show_label=True,
	)
	submit = gr.Button("Process Video", scale=1)
	output_video = gr.Video(label="Processed Video")
	# Update the speaker choices based on the selected language
	input_language.change(
	update_speaker_choices,
	inputs=input_language,
	outputs=input_speaker,
	)
	submit.click(
	process_video_translation,
	inputs=[input_video, input_language, input_speaker],
	outputs=output_video,
	)

	gr.HTML("<div style='height: 10px;'></div>")


	# Launch the interface
	demo.launch(debug=True)