Spaces:
Runtime error
Runtime error
from transformers import pipeline, Wav2Vec2ProcessorWithLM | |
from librosa import to_mono, resample | |
import numpy as np | |
import gradio as gr | |
DESC = """\ | |
Ukrainian speech recognition app/ | |
Розпізнавання голосу для української мови | |
""" | |
model_id = "arampacha/wav2vec2-xls-r-1b-uk" | |
processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_id) | |
asr = pipeline( | |
"automatic-speech-recognition", model=model_id, device=-1, | |
feature_extractor=processor.feature_extractor, decoder=processor.decoder | |
) | |
def run_asr(audio): | |
sr, audio_array = audio | |
audio_array = audio_array.astype(np.float32) | |
if len(audio_array.shape) > 1: | |
if audio_array.shape[1] == 1: | |
audio_array = audio_array.squeeze() | |
elif audio_array.shape[1] == 2: | |
audio_array = to_mono(audio_array.T) | |
else: | |
raise ValueError("Audio with > 2 channels not supported") | |
if sr != 16_000: | |
audio_array = resample(audio_array, orig_sr=sr, target_sr=16_000) | |
res = asr(audio_array, chunk_length_s=20, stride_length_s=2) | |
return res["text"] | |
text_out = gr.outputs.Textbox(label="transcript") | |
interface = gr.Interface( | |
run_asr, | |
"microphone", | |
text_out, | |
layout="horizontal", | |
theme="huggingface", | |
title="Speech-to-text Ukrainian", | |
description=DESC, | |
flagging_options=["incorrect"], | |
examples=["examples/dobryi_ranok.wav"] | |
) | |
interface.launch(debug=True) |