Spaces:
Runtime error
Runtime error
File size: 6,976 Bytes
4efe6b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import os
import sys
import time
import tqdm
import torch
import torchcrepe
import numpy as np
from multiprocessing import Pool
from functools import partial
current_directory = os.getcwd()
sys.path.append(current_directory)
from rvc.lib.utils import load_audio
from rvc.lib.predictors.RMVPE import RMVPE0Predictor
# Parse command line arguments
exp_dir = str(sys.argv[1])
f0_method = str(sys.argv[2])
hop_length = int(sys.argv[3])
num_processes = int(sys.argv[4])
gpus = str(sys.argv[5]) # - = Use CPU
os.environ["CUDA_VISIBLE_DEVICES"] = gpus.replace("-", ",")
class FeatureInput:
"""Class for F0 extraction."""
def __init__(self, sample_rate=16000, hop_size=160, device="cpu"):
self.fs = sample_rate
self.hop = hop_size
self.f0_bin = 256
self.f0_max = 1100.0
self.f0_min = 50.0
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
self.device = device
self.model_rmvpe = RMVPE0Predictor(
os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
is_half=False,
device=device,
)
def compute_f0(self, np_arr, f0_method, hop_length):
"""Extract F0 using the specified method."""
p_len = np_arr.shape[0] // self.hop
if f0_method == "crepe":
f0 = self.get_crepe(np_arr, p_len, hop_length)
elif f0_method == "rmvpe":
f0 = self.model_rmvpe.infer_from_audio(np_arr, thred=0.03)
else:
raise ValueError(f"Unknown F0 method: {f0_method}")
return f0
def get_crepe(self, x, p_len, hop_length):
"""Extract F0 using CREPE."""
audio = torch.from_numpy(x.astype(np.float32)).to(self.device)
audio /= torch.quantile(torch.abs(audio), 0.999)
audio = torch.unsqueeze(audio, dim=0)
pitch = torchcrepe.predict(
audio,
self.fs,
hop_length,
self.f0_min,
self.f0_max,
"full",
batch_size=hop_length * 2,
device=self.device,
pad=True,
)
source = pitch.squeeze(0).cpu().float().numpy()
source[source < 0.001] = np.nan
target = np.interp(
np.arange(0, len(source) * p_len, len(source)) / p_len,
np.arange(0, len(source)),
source,
)
return np.nan_to_num(target)
def coarse_f0(self, f0):
"""Convert F0 to coarse F0."""
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * (
self.f0_bin - 2
) / (self.f0_mel_max - self.f0_mel_min) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1
f0_coarse = np.rint(f0_mel).astype(int)
assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (
f0_coarse.max(),
f0_coarse.min(),
)
return f0_coarse
def process_file(self, file_info, f0_method, hop_length):
"""Process a single audio file for F0 extraction."""
inp_path, opt_path1, opt_path2, np_arr = file_info
if os.path.exists(opt_path1 + ".npy") and os.path.exists(opt_path2 + ".npy"):
return
try:
feature_pit = self.compute_f0(np_arr, f0_method, hop_length)
np.save(opt_path2, feature_pit, allow_pickle=False)
coarse_pit = self.coarse_f0(feature_pit)
np.save(opt_path1, coarse_pit, allow_pickle=False)
except Exception as error:
print(f"An error occurred extracting file {inp_path}: {error}")
def process_files(self, files, f0_method, hop_length, pbar):
"""Process multiple files."""
for file_info in files:
self.process_file(file_info, f0_method, hop_length)
pbar.update()
def main(exp_dir, f0_method, hop_length, num_processes, gpus):
paths = []
input_root = os.path.join(exp_dir, "sliced_audios_16k")
output_root1 = os.path.join(exp_dir, "f0")
output_root2 = os.path.join(exp_dir, "f0_voiced")
os.makedirs(output_root1, exist_ok=True)
os.makedirs(output_root2, exist_ok=True)
for name in sorted(os.listdir(input_root)):
if "spec" in name:
continue
input_path = os.path.join(input_root, name)
output_path1 = os.path.join(output_root1, name)
output_path2 = os.path.join(output_root2, name)
np_arr = load_audio(input_path, 16000)
paths.append([input_path, output_path1, output_path2, np_arr])
print(f"Starting extraction with {num_processes} cores and {f0_method}...")
start_time = time.time()
if gpus != "-":
gpus = gpus.split("-")
num_gpus = len(gpus)
process_partials = []
pbar = tqdm.tqdm(total=len(paths), desc="Pitch Extraction")
for idx, gpu in enumerate(gpus):
device = f"cuda:{gpu}"
if torch.cuda.is_available() and torch.cuda.device_count() > idx:
try:
feature_input = FeatureInput(device=device)
part_paths = paths[idx::num_gpus]
process_partials.append((feature_input, part_paths))
except Exception as error:
print(
f"Oops, there was an issue initializing GPU {device} ({error}). Maybe you don't have a GPU? No worries, switching to CPU for now."
)
feature_input = FeatureInput(device="cpu")
part_paths = paths[idx::num_gpus]
process_partials.append((feature_input, part_paths))
else:
print(f"GPU {device} is not available. Switching to CPU.")
feature_input = FeatureInput(device="cpu")
part_paths = paths[idx::num_gpus]
process_partials.append((feature_input, part_paths))
# Process each part with the corresponding GPU or CPU
for feature_input, part_paths in process_partials:
feature_input.process_files(part_paths, f0_method, hop_length, pbar)
pbar.close()
else:
# Use multiprocessing Pool for parallel processing with progress bar
feature_input = FeatureInput(device="cpu")
with tqdm.tqdm(total=len(paths), desc="Pitch Extraction") as pbar:
pool = Pool(processes=num_processes)
process_file_partial = partial(
feature_input.process_file, f0_method=f0_method, hop_length=hop_length
)
for _ in pool.imap_unordered(process_file_partial, paths):
pbar.update()
pool.close()
pool.join()
elapsed_time = time.time() - start_time
print(f"Pitch extraction completed in {elapsed_time:.2f} seconds.")
if __name__ == "__main__":
main(exp_dir, f0_method, hop_length, num_processes, gpus)
|