|
import gradio as gr
|
|
from zeroshot import (
|
|
process,
|
|
WORD_SCORE_DEFAULT_IF_NOLM,
|
|
)
|
|
import os
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
TEMP_DIR = Path("D:/Ngen/bot/temp_lexicon")
|
|
|
|
def ensure_temp_dir():
|
|
"""Create and ensure access to the specific directory."""
|
|
try:
|
|
|
|
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
|
logger.info(f"Created or verified temp directory at {TEMP_DIR}")
|
|
|
|
|
|
test_file = TEMP_DIR / 'test_write'
|
|
try:
|
|
test_file.touch()
|
|
test_file.unlink()
|
|
logger.info("Successfully verified write permissions")
|
|
except Exception as e:
|
|
logger.error(f"Failed to write to directory {TEMP_DIR}: {e}")
|
|
raise
|
|
|
|
return str(TEMP_DIR)
|
|
except Exception as e:
|
|
logger.error(f"Failed to create or access directory {TEMP_DIR}: {e}")
|
|
raise
|
|
|
|
|
|
TEMP_PATH = ensure_temp_dir()
|
|
os.environ['TEMP_LEXICON_DIR'] = TEMP_PATH
|
|
logger.info(f"Set TEMP_LEXICON_DIR environment variable to {TEMP_PATH}")
|
|
|
|
def process_wrapper(audio, words_file, wscore, wscore_usedefault, reference):
|
|
"""Обертка для функции process с фиксированными параметрами"""
|
|
generator = process(
|
|
audio_data=audio,
|
|
words_file=words_file,
|
|
lm_path=None,
|
|
wscore=wscore,
|
|
lmscore=None,
|
|
wscore_usedefault=wscore_usedefault,
|
|
lmscore_usedefault=True,
|
|
autolm=False,
|
|
reference=reference
|
|
)
|
|
|
|
|
|
transcription = ""
|
|
logs = ""
|
|
for trans, log in generator:
|
|
transcription += trans
|
|
logs += log
|
|
|
|
return transcription, logs
|
|
|
|
def create_gradio_interface():
|
|
"""Create and configure the Gradio interface"""
|
|
with gr.Blocks(css="style.css") as demo:
|
|
gr.Markdown(
|
|
"<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo</p>"
|
|
)
|
|
gr.HTML(
|
|
"""<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language.<br>We recommend having a minimum of 10000 sentences in the textfile to achieve a good performance.</center>"""
|
|
)
|
|
|
|
with gr.Row():
|
|
with gr.Column():
|
|
|
|
audio = gr.Audio(
|
|
label="Audio Input\n(use microphone or upload a file)",
|
|
type="filepath"
|
|
)
|
|
|
|
with gr.Row():
|
|
words_file = gr.File(label="Text Data")
|
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False):
|
|
gr.Markdown(
|
|
"The following parameters are used for beam-search decoding. Use the default values if you are not sure."
|
|
)
|
|
with gr.Row():
|
|
with gr.Column():
|
|
wscore_usedefault = gr.Checkbox(
|
|
label="Use Default Word Insertion Score",
|
|
value=True
|
|
)
|
|
wscore = gr.Slider(
|
|
minimum=-10.0,
|
|
maximum=10.0,
|
|
value=WORD_SCORE_DEFAULT_IF_NOLM,
|
|
step=0.1,
|
|
interactive=False,
|
|
label="Word Insertion Score",
|
|
)
|
|
|
|
btn = gr.Button("Submit", elem_id="submit")
|
|
|
|
|
|
@gr.on(
|
|
inputs=[wscore_usedefault],
|
|
outputs=[wscore],
|
|
)
|
|
def update_slider(ws):
|
|
return gr.Slider(
|
|
minimum=-10.0,
|
|
maximum=10.0,
|
|
value=WORD_SCORE_DEFAULT_IF_NOLM,
|
|
step=0.1,
|
|
interactive=not ws,
|
|
label="Word Insertion Score",
|
|
)
|
|
|
|
|
|
with gr.Column():
|
|
text = gr.Textbox(label="Transcript")
|
|
with gr.Accordion("Logs", open=False):
|
|
logs = gr.Textbox(show_label=False)
|
|
|
|
reference = gr.Textbox(label="Reference Transcript", visible=False)
|
|
|
|
|
|
btn.click(
|
|
fn=process_wrapper,
|
|
inputs=[
|
|
audio,
|
|
words_file,
|
|
wscore,
|
|
wscore_usedefault,
|
|
reference,
|
|
],
|
|
outputs=[text, logs],
|
|
)
|
|
|
|
|
|
gr.Examples(
|
|
examples=[
|
|
[
|
|
"upload/english/english.mp3",
|
|
"upload/english/c4_10k_sentences.txt",
|
|
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
|
|
],
|
|
[
|
|
"upload/english/english.mp3",
|
|
"upload/english/c4_5k_sentences.txt",
|
|
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
|
|
],
|
|
[
|
|
"upload/english/english.mp3",
|
|
"upload/english/gutenberg_27045.txt",
|
|
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
|
|
],
|
|
],
|
|
inputs=[audio, words_file, reference],
|
|
label="English",
|
|
)
|
|
|
|
gr.Examples(
|
|
examples=[
|
|
[
|
|
"upload/ligurian/ligurian_1.mp3",
|
|
"upload/ligurian/zenamt_10k_sentences.txt",
|
|
"I mæ colleghi m'an domandou d'aggiuttâli à fâ unna preuva co-o zeneise pe vedde s'o fonçioña.",
|
|
],
|
|
[
|
|
"upload/ligurian/ligurian_2.mp3",
|
|
"upload/ligurian/zenamt_10k_sentences.txt",
|
|
"Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.",
|
|
],
|
|
[
|
|
"upload/ligurian/ligurian_3.mp3",
|
|
"upload/ligurian/zenamt_5k_sentences.txt",
|
|
"Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l'euio, o formaggio, l'aggio e a sâ.",
|
|
],
|
|
],
|
|
inputs=[audio, words_file, reference],
|
|
label="Ligurian",
|
|
)
|
|
|
|
return demo
|
|
|
|
def main():
|
|
try:
|
|
|
|
demo = create_gradio_interface()
|
|
|
|
|
|
demo.launch(
|
|
server_name='0.0.0.0',
|
|
server_port=7860,
|
|
show_error=True
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to launch Gradio interface: {e}")
|
|
raise
|
|
|
|
if __name__ == "__main__":
|
|
main() |