LuisVasquezBSC commited on
Commit
a611372
1 Parent(s): 2bbd427

Add conversion to 16k mono

Browse files

The models require audio inputs in 16kHz and 1 channel.
I am adding utilities to convert any audio file into this format.

Files changed (1) hide show
  1. app.py +31 -0
app.py CHANGED
@@ -5,6 +5,32 @@ from typing import List
5
  import soundfile as sf
6
  import gradio as gr
7
  import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
  knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=device)
@@ -12,6 +38,11 @@ knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=T
12
 
13
  def convert_voice(src_wav_path:str, ref_wav_paths, top_k:int):
14
 
 
 
 
 
 
15
  query_seq = knn_vc.get_features(src_wav_path)
16
  matching_set = knn_vc.get_matching_set([ref_wav_paths])
17
  out_wav = knn_vc.match(query_seq, matching_set, topk=int(top_k))
 
5
  import soundfile as sf
6
  import gradio as gr
7
  import tempfile
8
+ import subprocess
9
+
10
+ def convert_to_16kHz_mono(input_file, output_file):
11
+ """
12
+ Converts an audio file to 16KHz sample rate and single channel (mono) using ffmpeg.
13
+
14
+ Parameters:
15
+ input_file (str): Path to the input audio file.
16
+ output_file (str): Path to the output WAV file.
17
+ """
18
+ try:
19
+ # Run the ffmpeg command
20
+ subprocess.run(['ffmpeg', '-y', '-i', input_file, '-ar', '16000', '-ac', '1', output_file], check=True)
21
+ print(f"Conversion complete: {output_file}")
22
+ return output_file
23
+ except subprocess.CalledProcessError as e:
24
+ print(f"An error occurred during conversion: {e}")
25
+
26
+ def create_temp_wav_file():
27
+ # Create a temporary file using NamedTemporaryFile
28
+ temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
29
+
30
+ # Get the path of the temporary file
31
+ temp_file_path = temp_file.name
32
+
33
+ return temp_file_path
34
 
35
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
  knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=device)
 
38
 
39
  def convert_voice(src_wav_path:str, ref_wav_paths, top_k:int):
40
 
41
+ tmp_src_wav_path = create_temp_wav_file()
42
+ tmp_ref_wav_path = create_temp_wav_file()
43
+ src_wav_path = convert_to_16kHz_mono(src_wav_path, tmp_src_wav_path)
44
+ ref_wav_paths = convert_to_16kHz_mono(ref_wav_paths, tmp_ref_wav_path)
45
+
46
  query_seq = knn_vc.get_features(src_wav_path)
47
  matching_set = knn_vc.get_matching_set([ref_wav_paths])
48
  out_wav = knn_vc.match(query_seq, matching_set, topk=int(top_k))