File size: 8,189 Bytes
6f03d40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import gradio as gr
from zeroshot import (
process,
WORD_SCORE_DEFAULT_IF_NOLM,
)
import os
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Set specific directory path
TEMP_DIR = Path("D:/Ngen/bot/temp_lexicon")
def ensure_temp_dir():
"""Create and ensure access to the specific directory."""
try:
# Create temp directory if it doesn't exist
TEMP_DIR.mkdir(parents=True, exist_ok=True)
logger.info(f"Created or verified temp directory at {TEMP_DIR}")
# Test write permissions
test_file = TEMP_DIR / 'test_write'
try:
test_file.touch()
test_file.unlink() # Remove test file
logger.info("Successfully verified write permissions")
except Exception as e:
logger.error(f"Failed to write to directory {TEMP_DIR}: {e}")
raise
return str(TEMP_DIR)
except Exception as e:
logger.error(f"Failed to create or access directory {TEMP_DIR}: {e}")
raise
# Create temporary directory at startup
TEMP_PATH = ensure_temp_dir()
os.environ['TEMP_LEXICON_DIR'] = TEMP_PATH
logger.info(f"Set TEMP_LEXICON_DIR environment variable to {TEMP_PATH}")
def process_wrapper(audio, words_file, wscore, wscore_usedefault, reference):
"""Обертка для функции process с фиксированными параметрами"""
generator = process(
audio_data=audio,
words_file=words_file,
lm_path=None,
wscore=wscore,
lmscore=None,
wscore_usedefault=wscore_usedefault,
lmscore_usedefault=True,
autolm=False,
reference=reference
)
# Получаем последний результат из генератора
transcription = ""
logs = ""
for trans, log in generator:
transcription += trans
logs += log
return transcription, logs
def create_gradio_interface():
"""Create and configure the Gradio interface"""
with gr.Blocks(css="style.css") as demo:
gr.Markdown(
"<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo</p>"
)
gr.HTML(
"""<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language.<br>We recommend having a minimum of 10000 sentences in the textfile to achieve a good performance.</center>"""
)
with gr.Row():
with gr.Column():
# Audio input section
audio = gr.Audio(
label="Audio Input\n(use microphone or upload a file)",
type="filepath"
)
with gr.Row():
words_file = gr.File(label="Text Data")
# Advanced settings section
with gr.Accordion("Advanced Settings", open=False):
gr.Markdown(
"The following parameters are used for beam-search decoding. Use the default values if you are not sure."
)
with gr.Row():
with gr.Column():
wscore_usedefault = gr.Checkbox(
label="Use Default Word Insertion Score",
value=True
)
wscore = gr.Slider(
minimum=-10.0,
maximum=10.0,
value=WORD_SCORE_DEFAULT_IF_NOLM,
step=0.1,
interactive=False,
label="Word Insertion Score",
)
btn = gr.Button("Submit", elem_id="submit")
# Slider update function
@gr.on(
inputs=[wscore_usedefault],
outputs=[wscore],
)
def update_slider(ws):
return gr.Slider(
minimum=-10.0,
maximum=10.0,
value=WORD_SCORE_DEFAULT_IF_NOLM,
step=0.1,
interactive=not ws,
label="Word Insertion Score",
)
# Output section
with gr.Column():
text = gr.Textbox(label="Transcript")
with gr.Accordion("Logs", open=False):
logs = gr.Textbox(show_label=False)
reference = gr.Textbox(label="Reference Transcript", visible=False)
# Process button click
btn.click(
fn=process_wrapper,
inputs=[
audio,
words_file,
wscore,
wscore_usedefault,
reference,
],
outputs=[text, logs],
)
# Example inputs
gr.Examples(
examples=[
[
"upload/english/english.mp3",
"upload/english/c4_10k_sentences.txt",
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
],
[
"upload/english/english.mp3",
"upload/english/c4_5k_sentences.txt",
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
],
[
"upload/english/english.mp3",
"upload/english/gutenberg_27045.txt",
"This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import",
],
],
inputs=[audio, words_file, reference],
label="English",
)
gr.Examples(
examples=[
[
"upload/ligurian/ligurian_1.mp3",
"upload/ligurian/zenamt_10k_sentences.txt",
"I mæ colleghi m'an domandou d'aggiuttâli à fâ unna preuva co-o zeneise pe vedde s'o fonçioña.",
],
[
"upload/ligurian/ligurian_2.mp3",
"upload/ligurian/zenamt_10k_sentences.txt",
"Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.",
],
[
"upload/ligurian/ligurian_3.mp3",
"upload/ligurian/zenamt_5k_sentences.txt",
"Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l'euio, o formaggio, l'aggio e a sâ.",
],
],
inputs=[audio, words_file, reference],
label="Ligurian",
)
return demo
def main():
try:
# Create and launch Gradio interface
demo = create_gradio_interface()
# Launch with specific host and port
demo.launch(
server_name='0.0.0.0',
server_port=7860,
show_error=True
)
except Exception as e:
logger.error(f"Failed to launch Gradio interface: {e}")
raise
if __name__ == "__main__":
main() |