TenzinGayche commited on
Commit
0db7ad9
·
1 Parent(s): 5f5648d

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +27 -0
handler.py CHANGED
@@ -12,6 +12,24 @@ import base64
12
  import re
13
  import requests
14
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  converter = pyewts.pyewts()
16
  def download_file(url, destination):
17
  response = requests.get(url)
@@ -84,6 +102,15 @@ class EndpointHandler():
84
  speaker_embedding = torch.tensor(speaker_embedding)
85
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
86
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
 
 
 
 
 
 
 
 
 
87
  # Create a unique temporary WAV file
88
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
89
  temp_wav_path = temp_wav_file.name
 
12
  import re
13
  import requests
14
  import os
15
+ from pydub import AudioSegment
16
+ def increase_volume_without_distortion(audio_data, sample_rate, target_dBFS):
17
+ # Create an AudioSegment from raw audio data
18
+ audio_segment = AudioSegment(
19
+ audio_data.tobytes(),
20
+ frame_rate=sample_rate,
21
+ sample_width=audio_data.dtype.itemsize,
22
+ channels=1 # or 2 for stereo
23
+ )
24
+
25
+ # Normalize the audio level
26
+ change_in_dBFS = target_dBFS - audio_segment.dBFS
27
+ normalized_audio = audio_segment.apply_gain(change_in_dBFS)
28
+
29
+ # Convert the AudioSegment back to a numpy array
30
+ normalized_audio_data = np.array(normalized_audio.get_array_of_samples()).astype(np.int16)
31
+
32
+ return normalized_audio_data
33
  converter = pyewts.pyewts()
34
  def download_file(url, destination):
35
  response = requests.get(url)
 
102
  speaker_embedding = torch.tensor(speaker_embedding)
103
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
104
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
105
+ if isinstance(speech, torch.Tensor):
106
+
107
+
108
+ speech = speech.numpy()
109
+
110
+ # Increase volume without distortion
111
+ target_dBFS = -20.0 # Adjust the value according to your requirement
112
+ speech = increase_volume_without_distortion(speech, 16000, target_dBFS)
113
+
114
  # Create a unique temporary WAV file
115
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
116
  temp_wav_path = temp_wav_file.name