File size: 3,218 Bytes
1582553 f532fdf 1582553 f532fdf 1582553 3c310e8 1582553 3c310e8 1582553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import numpy as np
import iSparrow.preprocessor_base as ppb
class Preprocessor(ppb.PreprocessorBase):
"""
Preprocessor Preprocess audio data into resampled chunks for analysis.
"""
def __init__(
self,
sample_rate: int = 48000,
overlap: float = 0.0,
sample_secs: int = 3.0,
resample_type: str = "kaiser_fast",
**kwargs
):
"""
__init__ Construct a new preprocesssor for custom birdnet classifiers from given parameters, and use defaults for the ones not present.
Args:
sample_rate (int, optional): The sample rate used to resample the read audio file. Defaults to 48000.
overlap (float, optional): Overlap between chunks to be analyzed. Defaults to 0.0.
sample_secs (int, optional): Length of chunks to be analyzed at once. Defaults to 3.0.
resample_type (str, optional): Resampling method used when reading from file. Defaults to "kaiser_fast".
"""
super().__init__(
"birdnet_default",
sample_rate=sample_rate,
overlap=overlap,
sample_secs=sample_secs,
resample_type=resample_type,
**kwargs
)
def process_audio_data(self, rawdata: np.ndarray) -> list:
"""
process_audio_data Process raw, resampled audio data into chunks that then can be analyzed
Args:
data (np.ndarray): raw, resampled audio data as returned from 'read_audio'
Returns:
list: chunked audio data
"""
print("process audio data default", flush=True)
seconds = self.sample_secs
minlen = 1.5
self.chunks = []
for i in range(
0, len(rawdata), int((seconds - self.overlap) * self.sample_rate)
):
split = rawdata[i : (i + int(seconds * self.actual_sampling_rate))]
# End of signal?
if len(split) < int(minlen * self.actual_sampling_rate):
break
# Signal chunk too short? Fill with zeros.
if len(split) < int(self.actual_sampling_rate * seconds):
temp = np.zeros((int(self.actual_sampling_rate * seconds)))
temp[: len(split)] = split
split = temp
self.chunks.append(split)
print(
"process audio data default: complete, read ",
str(len(self.chunks)),
"chunks.",flush=True)
return self.chunks
@classmethod
def from_cfg(cls, cfg: dict):
"""
from_cfg Construct a new preprocessor from a given dictionary. This represents typically a config node read from a YAML file.
Args:
cfg (dict): Config node read from a YAML file
Returns: new preprocessor instance
"""
allowed = [
"sample_rate",
"overlap",
"sample_secs",
"resample_type",
"duration",
"actual_sampling_rate",
]
if len([key for key in cfg if key not in allowed]) > 0:
raise RuntimeError("Erroneous keyword arguments in preprocessor config")
return cls(**cfg)
|