import gradio as gr from zeroshot import ( process, WORD_SCORE_DEFAULT_IF_NOLM, ) import os import logging from pathlib import Path # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Set specific directory path TEMP_DIR = Path("D:/Ngen/bot/temp_lexicon") def ensure_temp_dir(): """Create and ensure access to the specific directory.""" try: # Create temp directory if it doesn't exist TEMP_DIR.mkdir(parents=True, exist_ok=True) logger.info(f"Created or verified temp directory at {TEMP_DIR}") # Test write permissions test_file = TEMP_DIR / 'test_write' try: test_file.touch() test_file.unlink() # Remove test file logger.info("Successfully verified write permissions") except Exception as e: logger.error(f"Failed to write to directory {TEMP_DIR}: {e}") raise return str(TEMP_DIR) except Exception as e: logger.error(f"Failed to create or access directory {TEMP_DIR}: {e}") raise # Create temporary directory at startup TEMP_PATH = ensure_temp_dir() os.environ['TEMP_LEXICON_DIR'] = TEMP_PATH logger.info(f"Set TEMP_LEXICON_DIR environment variable to {TEMP_PATH}") def process_wrapper(audio, words_file, wscore, wscore_usedefault, reference): """Обертка для функции process с фиксированными параметрами""" generator = process( audio_data=audio, words_file=words_file, lm_path=None, wscore=wscore, lmscore=None, wscore_usedefault=wscore_usedefault, lmscore_usedefault=True, autolm=False, reference=reference ) # Получаем последний результат из генератора transcription = "" logs = "" for trans, log in generator: transcription += trans logs += log return transcription, logs def create_gradio_interface(): """Create and configure the Gradio interface""" with gr.Blocks(css="style.css") as demo: gr.Markdown( "

MMS Zero-shot ASR Demo

" ) gr.HTML( """
The demo works on input audio in any language, as long as you provide a list of words or sentences for that language.
We recommend having a minimum of 10000 sentences in the textfile to achieve a good performance.
""" ) with gr.Row(): with gr.Column(): # Audio input section audio = gr.Audio( label="Audio Input\n(use microphone or upload a file)", type="filepath" ) with gr.Row(): words_file = gr.File(label="Text Data") # Advanced settings section with gr.Accordion("Advanced Settings", open=False): gr.Markdown( "The following parameters are used for beam-search decoding. Use the default values if you are not sure." ) with gr.Row(): with gr.Column(): wscore_usedefault = gr.Checkbox( label="Use Default Word Insertion Score", value=True ) wscore = gr.Slider( minimum=-10.0, maximum=10.0, value=WORD_SCORE_DEFAULT_IF_NOLM, step=0.1, interactive=False, label="Word Insertion Score", ) btn = gr.Button("Submit", elem_id="submit") # Slider update function @gr.on( inputs=[wscore_usedefault], outputs=[wscore], ) def update_slider(ws): return gr.Slider( minimum=-10.0, maximum=10.0, value=WORD_SCORE_DEFAULT_IF_NOLM, step=0.1, interactive=not ws, label="Word Insertion Score", ) # Output section with gr.Column(): text = gr.Textbox(label="Transcript") with gr.Accordion("Logs", open=False): logs = gr.Textbox(show_label=False) reference = gr.Textbox(label="Reference Transcript", visible=False) # Process button click btn.click( fn=process_wrapper, inputs=[ audio, words_file, wscore, wscore_usedefault, reference, ], outputs=[text, logs], ) # Example inputs gr.Examples( examples=[ [ "upload/english/english.mp3", "upload/english/c4_10k_sentences.txt", "This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", ], [ "upload/english/english.mp3", "upload/english/c4_5k_sentences.txt", "This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", ], [ "upload/english/english.mp3", "upload/english/gutenberg_27045.txt", "This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", ], ], inputs=[audio, words_file, reference], label="English", ) gr.Examples( examples=[ [ "upload/ligurian/ligurian_1.mp3", "upload/ligurian/zenamt_10k_sentences.txt", "I mæ colleghi m'an domandou d'aggiuttâli à fâ unna preuva co-o zeneise pe vedde s'o fonçioña.", ], [ "upload/ligurian/ligurian_2.mp3", "upload/ligurian/zenamt_10k_sentences.txt", "Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.", ], [ "upload/ligurian/ligurian_3.mp3", "upload/ligurian/zenamt_5k_sentences.txt", "Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l'euio, o formaggio, l'aggio e a sâ.", ], ], inputs=[audio, words_file, reference], label="Ligurian", ) return demo def main(): try: # Create and launch Gradio interface demo = create_gradio_interface() # Launch with specific host and port demo.launch( server_name='0.0.0.0', server_port=7860, show_error=True ) except Exception as e: logger.error(f"Failed to launch Gradio interface: {e}") raise if __name__ == "__main__": main()