Spaces:
Runtime error
Runtime error
import gradio as gr | |
import librosa | |
import torch | |
import torchaudio | |
from datasets import load_dataset | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from noisereduce.torchgate import TorchGate as TG | |
import re | |
from pydub import AudioSegment | |
from torchaudio.transforms import Resample | |
import numpy as np | |
def transcribe_audio(audio_file): | |
audio = AudioSegment.from_wav(audio_file) | |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
input_arr, sampling_rate =librosa.load(audio_file) | |
# Create TorchGating instance | |
tg = TG(sr=sampling_rate, nonstationary=True).to(device) | |
try: | |
input_arr = tg(input_arr) | |
except: | |
input_arr = input_arr | |
if sampling_rate != 16000: | |
input_arr = librosa.resample(input_arr, orig_sr=sampling_rate, target_sr=16000) | |
MODEL_NAME = "rikeshsilwalekg/whisper-small-wer35-ekg" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
MODEL_NAME, torch_dtype=torch_dtype, use_safetensors=True | |
) | |
model.to(device) | |
processor = AutoProcessor.from_pretrained(MODEL_NAME) | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=30, | |
batch_size=16, | |
return_timestamps=False, | |
torch_dtype=torch_dtype, | |
device=device, | |
) | |
# return_timestamps=True for sentence level timestaps | |
# for word level timestamps return_timestamps="word" | |
prediction = pipe(input_arr) | |
prediction = prediction['text'] | |
audio_input = gr.inputs.Audio(source="upload", type="filepath") | |
iface = gr.Interface(fn=transcribe_audio, inputs=audio_input, | |
outputs=["textbox"], title="Nepali Speech To Text", | |
description="Upload an audio file and hit the 'Submit'\ | |
button") | |
iface.launch(inline=False) | |