transcriptionV3 / app.py
KIMOSSINO's picture
Update app.py
6bea046 verified
import gradio as gr
import whisper
from transformers import MarianMTModel, MarianTokenizer
import subprocess
import os
# Load models
def load_models():
global whisper_model, translation_models, translation_tokenizers
whisper_model = whisper.load_model("base") # Whisper model
translation_models = {
"en": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-es"),
"es": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en"),
"fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
"ar": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en"),
}
translation_tokenizers = {
lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en")
for lang in translation_models.keys()
}
load_models()
# Transcribe function
def transcribe_audio(file, language="en"):
try:
result = whisper_model.transcribe(file, language=language)
transcription = result["text"]
return transcription
except Exception as e:
return f"Error: {str(e)}"
# Translate function
def translate_text(text, source_lang, target_lang="en"):
try:
if source_lang not in translation_models or target_lang != "en":
return "Unsupported language."
tokenizer = translation_tokenizers[source_lang]
model = translation_models[source_lang]
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
except Exception as e:
return f"Error: {str(e)}"
# Text-to-Speech function
def text_to_speech(text, speaker="male", speed="normal"):
try:
output_file = "output.wav"
tts_command = [
"tts",
f"--text={text}",
"--model_name=tts_models/en/ljspeech/tacotron2-DCA",
f"--out_path={output_file}",
]
subprocess.run(tts_command, check=True)
return output_file
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
def tts_interface(text):
audio_file = text_to_speech(text)
return audio_file if isinstance(audio_file, str) and os.path.exists(audio_file) else None
with gr.Blocks() as demo:
gr.Markdown("### Audio Transcription, Translation, and TTS App")
# Transcription section
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="Upload Audio File", type="file")
lang_input = gr.Dropdown(["en", "es", "fr", "ar"], label="Language", value="en")
transcribe_btn = gr.Button("Transcribe")
transcription_output = gr.Textbox(label="Transcription Output")
transcribe_btn.click(transcribe_audio, inputs=[audio_input, lang_input], outputs=transcription_output)
# Translation section
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Input Text", lines=3)
source_lang = gr.Dropdown(["en", "es", "fr", "ar"], label="Source Language", value="en")
translate_btn = gr.Button("Translate")
translation_output = gr.Textbox(label="Translation Output")
translate_btn.click(translate_text, inputs=[text_input, source_lang], outputs=translation_output)
# TTS section
with gr.Row():
with gr.Column():
tts_input = gr.Textbox(label="Text for TTS", lines=2)
tts_btn = gr.Button("Generate Audio")
tts_output = gr.Audio(label="Generated Audio")
tts_btn.click(tts_interface, inputs=tts_input, outputs=tts_output)
# Launch Gradio App
demo.launch()