Spaces:

akhaliq
/

wav2vec2-large-robust-ft-libri-960h

Runtime error

wav2vec2-large-robust-ft-libri-960h / app.py

Ahsen Khaliq

Create app.py

676bbaa over 3 years ago

1.64 kB

	from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
	import soundfile as sf
	import torch

	# load model and processor
	processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
	model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")

	# define function to read in sound file
	def map_to_array(file):
	speech, _ = sf.read(file)
	return speech

	# tokenize
	def inference(audio):
	input_values = processor(map_to_array('/content/sample_data/sample2.flac'), return_tensors="pt", padding="longest").input_values # Batch size 1

	# retrieve logits
	logits = model(input_values).logits

	# take argmax and decode
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)
	return transcription[0]

	inputs = gr.inputs.Audio(label="Input Audio", type="file")
	outputs = gr.outputs.Textbox(label="Output Text")

	title = "wav2vec 2.0"
	description = "demo for Facebook AI wav2vec 2.0 using Hugging Face transformers. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
	article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2006.11477'>wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations</a> \| <a href='https://github.com/pytorch/fairseq'>Github Repo</a> \| <a href='https://huggingface.co./facebook/wav2vec2-base-960h'>Hugging Face model</a></p>"
	examples = [
	["poem.wav"]
	]

	gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()