Spaces:

juancopi81
/

youtube-music-transcribe

Build error

App Files Files Community

youtube-music-transcribe / utils.py

juancopi81

Add plotting function

d9489e4 over 2 years ago

raw

history blame

3.41 kB


	import tempfile
	import collections

	import librosa

	import pandas as pd
	import matplotlib.pyplot as plt
	from matplotlib.patches import Rectangle
	from PIL import Image

	class AudioIOReadError(BaseException): # pylint:disable=g-bad-exception-name
	pass

	def upload_audio(audio, sample_rate):

	return wav_data_to_samples_librosa(audio, sample_rate=sample_rate)

	def wav_data_to_samples_librosa(audio_file, sample_rate):
	"""Loads an in-memory audio file with librosa.
	Use this instead of wav_data_to_samples if the wav is 24-bit, as that's
	incompatible with wav_data_to_samples internal scipy call.
	Will copy to a local temp file before loading so that librosa can read a file
	path. Librosa does not currently read in-memory files.
	It will be treated as a .wav file.
	Args:
	audio_file: Wav file to load.
	sample_rate: The number of samples per second at which the audio will be
	returned. Resampling will be performed if necessary.
	Returns:
	A numpy array of audio samples, single-channel (mono) and sampled at the
	specified rate, in float32 format.
	Raises:
	AudioIOReadException: If librosa is unable to load the audio data.
	"""
	with tempfile.NamedTemporaryFile(suffix='.wav') as wav_input_file:
	wav_input_file.write(audio_file)
	# Before copying the file, flush any contents
	wav_input_file.flush()
	# And back the file position to top (not need for Copy but for certainty)
	wav_input_file.seek(0)
	return load_audio(wav_input_file.name, sample_rate)

	def load_audio(audio_filename, sample_rate, duration=10):
	"""Loads an audio file.
	Args:
	audio_filename: File path to load.
	sample_rate: The number of samples per second at which the audio will be
	returned. Resampling will be performed if necessary.
	Returns:
	A numpy array of audio samples, single-channel (mono) and sampled at the
	specified rate, in float32 format.
	Raises:
	AudioIOReadError: If librosa is unable to load the audio data.
	"""
	try:
	y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True, duration=duration)
	except Exception as e: # pylint: disable=broad-except
	raise AudioIOReadError(e)
	return y

	# Generate piano_roll
	def sequence_to_pandas_dataframe(sequence):
	pd_dict = collections.defaultdict(list)
	for note in sequence.notes:
	pd_dict["start_time"].append(note.start_time)
	pd_dict["end_time"].append(note.end_time)
	pd_dict["duration"].append(note.end_time - note.start_time)
	pd_dict["pitch"].append(note.pitch)

	return pd.DataFrame(pd_dict)

	def dataframe_to_pianoroll_img(df):
	fig = plt.figure(figsize=(8, 5))
	ax = fig.add_subplot(111)
	ax.scatter(df.start_time, df.pitch, c="white")
	for _, row in df.iterrows():
	ax.add_patch(Rectangle((row["start_time"], row["pitch"]-0.4), row["duration"], 0.4, color="black"))
	plt.xlabel('time (sec.)', fontsize=18)
	plt.ylabel('pitch (MIDI)', fontsize=16)
	return fig

	def fig2img(fig):
	"""Convert a Matplotlib figure to a PIL Image and return it"""
	import io
	buf = io.BytesIO()
	fig.savefig(buf, format="png")
	buf.seek(0)
	img = Image.open(buf)
	return img

	def create_image_from_note_sequence(sequence):
	df_sequence = sequence_to_pandas_dataframe(sequence)
	fig = dataframe_to_pianoroll_img(df_sequence)
	img = fig2img(fig)
	return img