File size: 3,690 Bytes
6bea046
d30be26
 
 
 
 
 
 
08cc981
d30be26
 
 
 
 
 
 
 
 
 
 
 
6bea046
d30be26
6bea046
 
d30be26
6bea046
d30be26
6bea046
d30be26
6bea046
d30be26
6bea046
 
d30be26
 
6bea046
d30be26
 
 
 
 
 
6bea046
d30be26
6bea046
d30be26
6bea046
 
d30be26
 
 
 
 
 
 
 
6bea046
 
 
 
d30be26
6bea046
 
 
 
d30be26
6bea046
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d30be26
6bea046
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import whisper
from transformers import MarianMTModel, MarianTokenizer
import subprocess
import os

# Load models
def load_models():
    global whisper_model, translation_models, translation_tokenizers
    whisper_model = whisper.load_model("base")  # Whisper model
    translation_models = {
        "en": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-es"),
        "es": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en"),
        "fr": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
        "ar": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en"),
    }
    translation_tokenizers = {
        lang: MarianTokenizer.from_pretrained(f"Helsinki-NLP/opus-mt-{lang}-en")
        for lang in translation_models.keys()
    }

load_models()

# Transcribe function
def transcribe_audio(file, language="en"):
    try:
        result = whisper_model.transcribe(file, language=language)
        transcription = result["text"]
        return transcription
    except Exception as e:
        return f"Error: {str(e)}"

# Translate function
def translate_text(text, source_lang, target_lang="en"):
    try:
        if source_lang not in translation_models or target_lang != "en":
            return "Unsupported language."

        tokenizer = translation_tokenizers[source_lang]
        model = translation_models[source_lang]
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        translated_tokens = model.generate(**inputs)
        translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
        return translated_text
    except Exception as e:
        return f"Error: {str(e)}"

# Text-to-Speech function
def text_to_speech(text, speaker="male", speed="normal"):
    try:
        output_file = "output.wav"
        tts_command = [
            "tts",
            f"--text={text}",
            "--model_name=tts_models/en/ljspeech/tacotron2-DCA",
            f"--out_path={output_file}",
        ]
        subprocess.run(tts_command, check=True)
        return output_file
    except Exception as e:
        return f"Error: {str(e)}"

# Gradio Interface
def tts_interface(text):
    audio_file = text_to_speech(text)
    return audio_file if isinstance(audio_file, str) and os.path.exists(audio_file) else None

with gr.Blocks() as demo:
    gr.Markdown("### Audio Transcription, Translation, and TTS App")
    
    # Transcription section
    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(label="Upload Audio File", type="file")
            lang_input = gr.Dropdown(["en", "es", "fr", "ar"], label="Language", value="en")
            transcribe_btn = gr.Button("Transcribe")
        transcription_output = gr.Textbox(label="Transcription Output")

    transcribe_btn.click(transcribe_audio, inputs=[audio_input, lang_input], outputs=transcription_output)

    # Translation section
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(label="Input Text", lines=3)
            source_lang = gr.Dropdown(["en", "es", "fr", "ar"], label="Source Language", value="en")
            translate_btn = gr.Button("Translate")
        translation_output = gr.Textbox(label="Translation Output")

    translate_btn.click(translate_text, inputs=[text_input, source_lang], outputs=translation_output)

    # TTS section
    with gr.Row():
        with gr.Column():
            tts_input = gr.Textbox(label="Text for TTS", lines=2)
            tts_btn = gr.Button("Generate Audio")
        tts_output = gr.Audio(label="Generated Audio")

    tts_btn.click(tts_interface, inputs=tts_input, outputs=tts_output)

# Launch Gradio App
demo.launch()