Tonic commited on
Commit
9c5433a
1 Parent(s): 9c4ca64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -37,16 +37,22 @@ def whisper_speech_demo(text, lang, speaker_audio, mix_lang, mix_text):
37
  resample_audio = resampler(newsr=24000)
38
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
39
 
40
- # Normalize
41
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
42
  tmp_file_name = tmp_file.name
43
  audio_np = audio_data_resampled.numpy() # Convert to numpy array
44
-
 
45
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
46
  audio_np = audio_np / np.max(np.abs(audio_np))
47
-
48
- sf.write(tmp_file_name, audio_np, 24000, 'PCM_24') # Write with a sample rate of 24000 Hz
49
-
 
 
 
 
 
50
  return tmp_file_name
51
 
52
  with gr.Blocks() as demo:
 
37
  resample_audio = resampler(newsr=24000)
38
  audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
39
 
40
+ # Normalize and write to a WAV file
41
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
42
  tmp_file_name = tmp_file.name
43
  audio_np = audio_data_resampled.numpy() # Convert to numpy array
44
+
45
+ # Normalize if necessary
46
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
47
  audio_np = audio_np / np.max(np.abs(audio_np))
48
+
49
+ # Ensure the audio data is 2D (num_samples, num_channels)
50
+ if audio_np.ndim == 1:
51
+ audio_np = np.expand_dims(audio_np, axis=1)
52
+
53
+ # Write the file
54
+ sf.write(tmp_file_name, audio_np, 24000)
55
+
56
  return tmp_file_name
57
 
58
  with gr.Blocks() as demo: