import logging import warnings import gradio as gr from transformers import pipeline from transformers.utils.logging import disable_progress_bar warnings.filterwarnings("ignore") disable_progress_bar() logging.basicConfig( format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s", datefmt="%Y-%m-%dT%H:%M:%SZ", ) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) pipe = pipeline(model="bofenghuang/asr-wav2vec2-ctc-french") logger.info("ASR pipeline has been initialized") def transcribe(audio, state=""): text = pipe(audio, chunk_length_s=5, stride_length_s=1)["text"] state += text + " " logger.info(f"Transcription for {audio}: {state}") return state, state # streaming mode iface = gr.Interface( fn=transcribe, inputs=[gr.Audio(source="microphone", type="filepath", streaming=True, label="Record something..."), "state"], outputs=["textbox", "state"], title="Realtime Speech-to-Text in French", description="Realtime demo for French automatic speech recognition.", allow_flagging="never", live=True, ) # iface.launch(server_name="0.0.0.0", debug=True, share=True) iface.launch()