File size: 8,189 Bytes

6f03d40

import gradio as gr
from zeroshot import (
    process,
    WORD_SCORE_DEFAULT_IF_NOLM,
)
import os
import logging
from pathlib import Path

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Set specific directory path
TEMP_DIR = Path("D:/Ngen/bot/temp_lexicon")

def ensure_temp_dir():
    """Create and ensure access to the specific directory."""
    try:
        # Create temp directory if it doesn't exist
        TEMP_DIR.mkdir(parents=True, exist_ok=True)
        logger.info(f"Created or verified temp directory at {TEMP_DIR}")
        
        # Test write permissions
        test_file = TEMP_DIR / 'test_write'
        try:
            test_file.touch()
            test_file.unlink()  # Remove test file
            logger.info("Successfully verified write permissions")
        except Exception as e:
            logger.error(f"Failed to write to directory {TEMP_DIR}: {e}")
            raise
            
        return str(TEMP_DIR)
    except Exception as e:
        logger.error(f"Failed to create or access directory {TEMP_DIR}: {e}")
        raise

# Create temporary directory at startup
TEMP_PATH = ensure_temp_dir()
os.environ['TEMP_LEXICON_DIR'] = TEMP_PATH
logger.info(f"Set TEMP_LEXICON_DIR environment variable to {TEMP_PATH}")

def process_wrapper(audio, words_file, wscore, wscore_usedefault, reference):
    """Обертка для функции process с фиксированными параметрами"""
    generator = process(
        audio_data=audio,
        words_file=words_file,
        lm_path=None,
        wscore=wscore,
        lmscore=None,
        wscore_usedefault=wscore_usedefault,
        lmscore_usedefault=True,
        autolm=False,
        reference=reference
    )
    
    # Получаем последний результат из генератора
    transcription = ""
    logs = ""
    for trans, log in generator:
        transcription += trans
        logs += log
    
    return transcription, logs

def create_gradio_interface():
    """Create and configure the Gradio interface"""
    with gr.Blocks(css="style.css") as demo:
        gr.Markdown(
            "<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo</p>"
        )
        gr.HTML(
            """<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language.<br>We recommend having a minimum of 10000 sentences in the textfile to achieve a good performance.</center>"""
        )
        
        with gr.Row():
            with gr.Column():
                # Audio input section
                audio = gr.Audio(
                    label="Audio Input\n(use microphone or upload a file)",
                    type="filepath"
                )

                with gr.Row():
                    words_file = gr.File(label="Text Data")

                # Advanced settings section
                with gr.Accordion("Advanced Settings", open=False):
                    gr.Markdown(
                        "The following parameters are used for beam-search decoding. Use the default values if you are not sure."
                    )
                    with gr.Row():
                        with gr.Column():
                            wscore_usedefault = gr.Checkbox(
                                label="Use Default Word Insertion Score",
                                value=True
                            )
                            wscore = gr.Slider(
                                minimum=-10.0,
                                maximum=10.0,
                                value=WORD_SCORE_DEFAULT_IF_NOLM,
                                step=0.1,
                                interactive=False,
                                label="Word Insertion Score",
                            )
                
                btn = gr.Button("Submit", elem_id="submit")

                # Slider update function
                @gr.on(

                    inputs=[wscore_usedefault],

                    outputs=[wscore],

                )
                def update_slider(ws):
                    return gr.Slider(
                        minimum=-10.0,
                        maximum=10.0,
                        value=WORD_SCORE_DEFAULT_IF_NOLM,
                        step=0.1,
                        interactive=not ws,
                        label="Word Insertion Score",
                    )

            # Output section
            with gr.Column():
                text = gr.Textbox(label="Transcript")
                with gr.Accordion("Logs", open=False):
                    logs = gr.Textbox(show_label=False)

            reference = gr.Textbox(label="Reference Transcript", visible=False)

            # Process button click
            btn.click(
                fn=process_wrapper,
                inputs=[
                    audio,
                    words_file,
                    wscore,
                    wscore_usedefault,
                    reference,
                ],
                outputs=[text, logs],
            )

            # Example inputs
            gr.Examples(
                examples=[
                    [
                        "upload/english/english.mp3",
                        "upload/english/c4_10k_sentences.txt",
                        "This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
                    ],
                    [
                        "upload/english/english.mp3",
                        "upload/english/c4_5k_sentences.txt",
                        "This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
                    ],
                    [
                        "upload/english/english.mp3",
                        "upload/english/gutenberg_27045.txt",
                        "This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
                    ],
                ],
                inputs=[audio, words_file, reference],
                label="English",
            )
            
            gr.Examples(
                examples=[
                    [
                        "upload/ligurian/ligurian_1.mp3",
                        "upload/ligurian/zenamt_10k_sentences.txt",
                        "I mæ colleghi m'an domandou d'aggiuttâli à fâ unna preuva co-o zeneise pe vedde s'o fonçioña.",
                    ],
                    [
                        "upload/ligurian/ligurian_2.mp3",
                        "upload/ligurian/zenamt_10k_sentences.txt",
                        "Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.",
                    ],
                    [
                        "upload/ligurian/ligurian_3.mp3",
                        "upload/ligurian/zenamt_5k_sentences.txt",
                        "Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l'euio, o formaggio, l'aggio e a sâ.",
                    ],
                ],
                inputs=[audio, words_file, reference],
                label="Ligurian",
            )
            
        return demo

def main():
    try:
        # Create and launch Gradio interface
        demo = create_gradio_interface()
        
        # Launch with specific host and port
        demo.launch(
            server_name='0.0.0.0',
            server_port=7860,
            show_error=True
        )
    except Exception as e:
        logger.error(f"Failed to launch Gradio interface: {e}")
        raise

if __name__ == "__main__":
    main()