Spaces:

Ghana-NLP
/

demo-dubbing

Paused

App Files Files Community

demo-dubbing / app.py

Lagyamfi

first draft of frontend

c59a3c0 4 months ago

raw

history blame

4.11 kB

	import gradio as gr
	from tqdm import tqdm

	from pipeline import (
	extract_audio_from_video,
	transcribe_and_preprocess_audio,
	translation_main,
	tts_main,
	combine_audio_streams,
	create_combined_output,
	)
	from pipeline import translation_hdr, translation_url, LANG


	async def process_video_translation(
	input_video, speaker, progress=gr.Progress(track_tqdm=True)
	):
	total_stages = 6
	output_video = f"{input_video.split('.')[0]}_translated.mp4"
	with tqdm(total=total_stages, desc="Processing video translation") as pbar:
	progress(0.1, desc="Extracting audio from video")
	pbar.update(1)
	output_audio_path = extract_audio_from_video(input_video)

	# transcribe audio
	pbar.set_description("Transcribing audio")
	pbar.update(1)
	sentences = transcribe_and_preprocess_audio(output_audio_path)

	# translate to twi
	pbar.set_description("Translating to Twi")
	khaya_translations = await translation_main(
	sentences, translation_url, translation_hdr, LANG
	)
	# create output files
	print("Creating output files")
	list_of_output_chunks = [
	f"translated_{i}.wav" for i in range(len(khaya_translations))
	]
	pbar.update(1)

	# convert to speech
	pbar.set_description("Converting to speech")
	await tts_main(khaya_translations, speaker, list_of_output_chunks)
	pbar.update(1)

	# combine audio streams
	print("Combining audio streams")
	pbar.set_description("Combining audio streams")
	output_audio = combine_audio_streams(
	list_of_output_chunks, "combined_audio.wav"
	)
	pbar.update(1)

	pbar.set_description("Combining audio and video")
	create_combined_output(input_video, output_audio, output_video)
	pbar.update(1)

	print("Video translation completed")
	gr.Info(f"Video translation completed", duration=2)

	return output_video


	with gr.Blocks(
	theme=gr.themes.Soft(),
	title="Video Dubbing Interface",
	) as demo:
	with gr.Row(variant="default"):
	with gr.Column(
	scale=1,
	min_width=0,
	):
	gr.Image(
	"logo_2.jpeg",
	show_label=False,
	width=150,
	height=150,
	show_download_button=False,
	show_fullscreen_button=False,
	container=False,
	)
	with gr.Column(
	scale=2,
	):
	gr.Markdown("# Video Dubbing Interface", height=100)
	with gr.Column(
	scale=1,
	min_width=0,
	):
	gr.Image(
	"NLPGhana_logo_2.png",
	show_label=False,
	width=50,
	height=150,
	show_download_button=False,
	show_fullscreen_button=False,
	container=False,
	)

	# main interface components
	with gr.Row():
	input_video = gr.Video(label="Input Video", sources=["upload"])
	input_speaker = gr.Radio(
	label="Select Speaker",
	choices=["male", "female"],
	value="female",
	min_width=50,
	container=True,
	)
	output_video = gr.Video(label="Processed Video")

	with gr.Row():

	# process video translation
	submit = gr.Button("Process Video", scale=1)
	submit.click(
	process_video_translation,
	inputs=[input_video, input_speaker],
	outputs=output_video,
	)


	# # Define the Gradio interface
	# interface = gr.Interface(
	# fn=process_video_translation, # Function to process the video
	# inputs=gr.Video(label="Input Video"), # Video file input
	# outputs=gr.Video(label="Processed Video"), # Video file output
	# title="Video Processing Interface",
	# description="Upload a video, and the processed video will be returned.",
	# theme="light",
	# )

	# Launch the interface
	demo.launch(debug=True)