import gradio as gr
from zeroshot import (
process,
WORD_SCORE_DEFAULT_IF_NOLM,
)
import os
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Set specific directory path
TEMP_DIR = Path("D:/Ngen/bot/temp_lexicon")
def ensure_temp_dir():
"""Create and ensure access to the specific directory."""
try:
# Create temp directory if it doesn't exist
TEMP_DIR.mkdir(parents=True, exist_ok=True)
logger.info(f"Created or verified temp directory at {TEMP_DIR}")
# Test write permissions
test_file = TEMP_DIR / 'test_write'
try:
test_file.touch()
test_file.unlink() # Remove test file
logger.info("Successfully verified write permissions")
except Exception as e:
logger.error(f"Failed to write to directory {TEMP_DIR}: {e}")
raise
return str(TEMP_DIR)
except Exception as e:
logger.error(f"Failed to create or access directory {TEMP_DIR}: {e}")
raise
# Create temporary directory at startup
TEMP_PATH = ensure_temp_dir()
os.environ['TEMP_LEXICON_DIR'] = TEMP_PATH
logger.info(f"Set TEMP_LEXICON_DIR environment variable to {TEMP_PATH}")
def process_wrapper(audio, words_file, wscore, wscore_usedefault, reference):
"""Обертка для функции process с фиксированными параметрами"""
generator = process(
audio_data=audio,
words_file=words_file,
lm_path=None,
wscore=wscore,
lmscore=None,
wscore_usedefault=wscore_usedefault,
lmscore_usedefault=True,
autolm=False,
reference=reference
)
# Получаем последний результат из генератора
transcription = ""
logs = ""
for trans, log in generator:
transcription += trans
logs += log
return transcription, logs
def create_gradio_interface():
"""Create and configure the Gradio interface"""
with gr.Blocks(css="style.css") as demo:
gr.Markdown(
"
MMS Zero-shot ASR Demo
"
)
gr.HTML(
"""The demo works on input audio in any language, as long as you provide a list of words or sentences for that language.
We recommend having a minimum of 10000 sentences in the textfile to achieve a good performance."""
)
with gr.Row():
with gr.Column():
# Audio input section
audio = gr.Audio(
label="Audio Input\n(use microphone or upload a file)",
type="filepath"
)
with gr.Row():
words_file = gr.File(label="Text Data")
# Advanced settings section
with gr.Accordion("Advanced Settings", open=False):
gr.Markdown(
"The following parameters are used for beam-search decoding. Use the default values if you are not sure."
)
with gr.Row():
with gr.Column():
wscore_usedefault = gr.Checkbox(
label="Use Default Word Insertion Score",
value=True
)
wscore = gr.Slider(
minimum=-10.0,
maximum=10.0,
value=WORD_SCORE_DEFAULT_IF_NOLM,
step=0.1,
interactive=False,
label="Word Insertion Score",
)
btn = gr.Button("Submit", elem_id="submit")
# Slider update function
@gr.on(
inputs=[wscore_usedefault],
outputs=[wscore],
)
def update_slider(ws):
return gr.Slider(
minimum=-10.0,
maximum=10.0,
value=WORD_SCORE_DEFAULT_IF_NOLM,
step=0.1,
interactive=not ws,
label="Word Insertion Score",
)
# Output section
with gr.Column():
text = gr.Textbox(label="Transcript")
with gr.Accordion("Logs", open=False):
logs = gr.Textbox(show_label=False)
reference = gr.Textbox(label="Reference Transcript", visible=False)
# Process button click
btn.click(
fn=process_wrapper,
inputs=[
audio,
words_file,
wscore,
wscore_usedefault,
reference,
],
outputs=[text, logs],
)
# Example inputs
gr.Examples(
examples=[
[
"upload/english/english.mp3",
"upload/english/c4_10k_sentences.txt",
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
],
[
"upload/english/english.mp3",
"upload/english/c4_5k_sentences.txt",
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
],
[
"upload/english/english.mp3",
"upload/english/gutenberg_27045.txt",
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
],
],
inputs=[audio, words_file, reference],
label="English",
)
gr.Examples(
examples=[
[
"upload/ligurian/ligurian_1.mp3",
"upload/ligurian/zenamt_10k_sentences.txt",
"I mæ colleghi m'an domandou d'aggiuttâli à fâ unna preuva co-o zeneise pe vedde s'o fonçioña.",
],
[
"upload/ligurian/ligurian_2.mp3",
"upload/ligurian/zenamt_10k_sentences.txt",
"Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.",
],
[
"upload/ligurian/ligurian_3.mp3",
"upload/ligurian/zenamt_5k_sentences.txt",
"Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l'euio, o formaggio, l'aggio e a sâ.",
],
],
inputs=[audio, words_file, reference],
label="Ligurian",
)
return demo
def main():
try:
# Create and launch Gradio interface
demo = create_gradio_interface()
# Launch with specific host and port
demo.launch(
server_name='0.0.0.0',
server_port=7860,
show_error=True
)
except Exception as e:
logger.error(f"Failed to launch Gradio interface: {e}")
raise
if __name__ == "__main__":
main()