import gradio as gr import whisper from transformers import MarianMTModel, MarianTokenizer import subprocess import os # Load models def load_models(): global whisper_model, translation_models, translation_tokenizers whisper_model = whisper.load_model("base") # Whisper model translation_models = { "en": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-es"), "es": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en"), "fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"), "ar": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en"), } translation_tokenizers = { lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en") for lang in translation_models.keys() } load_models() # Transcribe function def transcribe_audio(file, language="en"): try: result = whisper_model.transcribe(file, language=language) transcription = result["text"] return transcription except Exception as e: return f"Error: {str(e)}" # Translate function def translate_text(text, source_lang, target_lang="en"): try: if source_lang not in translation_models or target_lang != "en": return "Unsupported language." tokenizer = translation_tokenizers[source_lang] model = translation_models[source_lang] inputs = tokenizer(text, return_tensors="pt", padding=True) translated_tokens = model.generate(**inputs) translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) return translated_text except Exception as e: return f"Error: {str(e)}" # Text-to-Speech function def text_to_speech(text, speaker="male", speed="normal"): try: output_file = "output.wav" tts_command = [ "tts", f"--text={text}", "--model_name=tts_models/en/ljspeech/tacotron2-DCA", f"--out_path={output_file}", ] subprocess.run(tts_command, check=True) return output_file except Exception as e: return f"Error: {str(e)}" # Gradio Interface def tts_interface(text): audio_file = text_to_speech(text) return audio_file if isinstance(audio_file, str) and os.path.exists(audio_file) else None with gr.Blocks() as demo: gr.Markdown("### Audio Transcription, Translation, and TTS App") # Transcription section with gr.Row(): with gr.Column(): audio_input = gr.Audio(label="Upload Audio File", type="file") lang_input = gr.Dropdown(["en", "es", "fr", "ar"], label="Language", value="en") transcribe_btn = gr.Button("Transcribe") transcription_output = gr.Textbox(label="Transcription Output") transcribe_btn.click(transcribe_audio, inputs=[audio_input, lang_input], outputs=transcription_output) # Translation section with gr.Row(): with gr.Column(): text_input = gr.Textbox(label="Input Text", lines=3) source_lang = gr.Dropdown(["en", "es", "fr", "ar"], label="Source Language", value="en") translate_btn = gr.Button("Translate") translation_output = gr.Textbox(label="Translation Output") translate_btn.click(translate_text, inputs=[text_input, source_lang], outputs=translation_output) # TTS section with gr.Row(): with gr.Column(): tts_input = gr.Textbox(label="Text for TTS", lines=2) tts_btn = gr.Button("Generate Audio") tts_output = gr.Audio(label="Generated Audio") tts_btn.click(tts_interface, inputs=tts_input, outputs=tts_output) # Launch Gradio App demo.launch()