Spaces:
Build error
Build error
File size: 3,690 Bytes
6bea046 d30be26 08cc981 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 d30be26 6bea046 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import whisper
from transformers import MarianMTModel, MarianTokenizer
import subprocess
import os
# Load models
def load_models():
global whisper_model, translation_models, translation_tokenizers
whisper_model = whisper.load_model("base") # Whisper model
translation_models = {
"en": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-es"),
"es": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en"),
"fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
"ar": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en"),
}
translation_tokenizers = {
lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en")
for lang in translation_models.keys()
}
load_models()
# Transcribe function
def transcribe_audio(file, language="en"):
try:
result = whisper_model.transcribe(file, language=language)
transcription = result["text"]
return transcription
except Exception as e:
return f"Error: {str(e)}"
# Translate function
def translate_text(text, source_lang, target_lang="en"):
try:
if source_lang not in translation_models or target_lang != "en":
return "Unsupported language."
tokenizer = translation_tokenizers[source_lang]
model = translation_models[source_lang]
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
except Exception as e:
return f"Error: {str(e)}"
# Text-to-Speech function
def text_to_speech(text, speaker="male", speed="normal"):
try:
output_file = "output.wav"
tts_command = [
"tts",
f"--text={text}",
"--model_name=tts_models/en/ljspeech/tacotron2-DCA",
f"--out_path={output_file}",
]
subprocess.run(tts_command, check=True)
return output_file
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
def tts_interface(text):
audio_file = text_to_speech(text)
return audio_file if isinstance(audio_file, str) and os.path.exists(audio_file) else None
with gr.Blocks() as demo:
gr.Markdown("### Audio Transcription, Translation, and TTS App")
# Transcription section
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="Upload Audio File", type="file")
lang_input = gr.Dropdown(["en", "es", "fr", "ar"], label="Language", value="en")
transcribe_btn = gr.Button("Transcribe")
transcription_output = gr.Textbox(label="Transcription Output")
transcribe_btn.click(transcribe_audio, inputs=[audio_input, lang_input], outputs=transcription_output)
# Translation section
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Input Text", lines=3)
source_lang = gr.Dropdown(["en", "es", "fr", "ar"], label="Source Language", value="en")
translate_btn = gr.Button("Translate")
translation_output = gr.Textbox(label="Translation Output")
translate_btn.click(translate_text, inputs=[text_input, source_lang], outputs=translation_output)
# TTS section
with gr.Row():
with gr.Column():
tts_input = gr.Textbox(label="Text for TTS", lines=2)
tts_btn = gr.Button("Generate Audio")
tts_output = gr.Audio(label="Generated Audio")
tts_btn.click(tts_interface, inputs=tts_input, outputs=tts_output)
# Launch Gradio App
demo.launch()
|