Spaces:
Build error
Build error
import gradio as gr | |
import whisper | |
from transformers import MarianMTModel, MarianTokenizer | |
import subprocess | |
import os | |
# Load models | |
def load_models(): | |
global whisper_model, translation_models, translation_tokenizers | |
whisper_model = whisper.load_model("base") # Whisper model | |
translation_models = { | |
"en": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-es"), | |
"es": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en"), | |
"fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"), | |
"ar": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en"), | |
} | |
translation_tokenizers = { | |
lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en") | |
for lang in translation_models.keys() | |
} | |
load_models() | |
# Transcribe function | |
def transcribe_audio(file, language="en"): | |
try: | |
result = whisper_model.transcribe(file, language=language) | |
transcription = result["text"] | |
return transcription | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Translate function | |
def translate_text(text, source_lang, target_lang="en"): | |
try: | |
if source_lang not in translation_models or target_lang != "en": | |
return "Unsupported language." | |
tokenizer = translation_tokenizers[source_lang] | |
model = translation_models[source_lang] | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
translated_tokens = model.generate(**inputs) | |
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
return translated_text | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Text-to-Speech function | |
def text_to_speech(text, speaker="male", speed="normal"): | |
try: | |
output_file = "output.wav" | |
tts_command = [ | |
"tts", | |
f"--text={text}", | |
"--model_name=tts_models/en/ljspeech/tacotron2-DCA", | |
f"--out_path={output_file}", | |
] | |
subprocess.run(tts_command, check=True) | |
return output_file | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Gradio Interface | |
def tts_interface(text): | |
audio_file = text_to_speech(text) | |
return audio_file if isinstance(audio_file, str) and os.path.exists(audio_file) else None | |
with gr.Blocks() as demo: | |
gr.Markdown("### Audio Transcription, Translation, and TTS App") | |
# Transcription section | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio(label="Upload Audio File", type="file") | |
lang_input = gr.Dropdown(["en", "es", "fr", "ar"], label="Language", value="en") | |
transcribe_btn = gr.Button("Transcribe") | |
transcription_output = gr.Textbox(label="Transcription Output") | |
transcribe_btn.click(transcribe_audio, inputs=[audio_input, lang_input], outputs=transcription_output) | |
# Translation section | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox(label="Input Text", lines=3) | |
source_lang = gr.Dropdown(["en", "es", "fr", "ar"], label="Source Language", value="en") | |
translate_btn = gr.Button("Translate") | |
translation_output = gr.Textbox(label="Translation Output") | |
translate_btn.click(translate_text, inputs=[text_input, source_lang], outputs=translation_output) | |
# TTS section | |
with gr.Row(): | |
with gr.Column(): | |
tts_input = gr.Textbox(label="Text for TTS", lines=2) | |
tts_btn = gr.Button("Generate Audio") | |
tts_output = gr.Audio(label="Generated Audio") | |
tts_btn.click(tts_interface, inputs=tts_input, outputs=tts_output) | |
# Launch Gradio App | |
demo.launch() | |