Spaces:

wetdog
/

knn-voice-conversion

Sleeping

App Files Files Community

LuisVasquezBSC commited on Jul 11

Commit

a611372

•

1 Parent(s): 2bbd427

Add conversion to 16k mono

Browse files

The models require audio inputs in 16kHz and 1 channel.
I am adding utilities to convert any audio file into this format.

Files changed (1) hide show

app.py +31 -0

app.py CHANGED Viewed

@@ -5,6 +5,32 @@ from typing import List
 import soundfile as sf
 import gradio as gr
 import tempfile
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=device)
@@ -12,6 +38,11 @@ knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=T
 def convert_voice(src_wav_path:str, ref_wav_paths, top_k:int):
     query_seq = knn_vc.get_features(src_wav_path)
     matching_set = knn_vc.get_matching_set([ref_wav_paths])
     out_wav = knn_vc.match(query_seq, matching_set, topk=int(top_k))

 import soundfile as sf
 import gradio as gr
 import tempfile
+import subprocess
+def convert_to_16kHz_mono(input_file, output_file):
+    """
+    Converts an audio file to 16KHz sample rate and single channel (mono) using ffmpeg.
+    Parameters:
+    input_file (str): Path to the input audio file.
+    output_file (str): Path to the output WAV file.
+    """
+    try:
+        # Run the ffmpeg command
+        subprocess.run(['ffmpeg', '-y', '-i', input_file, '-ar', '16000', '-ac', '1', output_file], check=True)
+        print(f"Conversion complete: {output_file}")
+        return output_file
+    except subprocess.CalledProcessError as e:
+        print(f"An error occurred during conversion: {e}")
+def create_temp_wav_file():
+    # Create a temporary file using NamedTemporaryFile
+    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    # Get the path of the temporary file
+    temp_file_path = temp_file.name
+    return temp_file_path
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=device)
 def convert_voice(src_wav_path:str, ref_wav_paths, top_k:int):
+    tmp_src_wav_path = create_temp_wav_file()
+    tmp_ref_wav_path = create_temp_wav_file()
+    src_wav_path = convert_to_16kHz_mono(src_wav_path, tmp_src_wav_path)
+    ref_wav_paths = convert_to_16kHz_mono(ref_wav_paths, tmp_ref_wav_path)
     query_seq = knn_vc.get_features(src_wav_path)
     matching_set = knn_vc.get_matching_set([ref_wav_paths])
     out_wav = knn_vc.match(query_seq, matching_set, topk=int(top_k))