TenzinGayche commited on
Commit
4f18e92
·
1 Parent(s): 94e964f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +15 -8
handler.py CHANGED
@@ -1,5 +1,4 @@
1
  from typing import Dict, Any,Union
2
- import librosa
3
  import tempfile
4
  import numpy as np
5
  import torch
@@ -8,6 +7,7 @@ import noisereduce as nr
8
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
9
  from num2tib.core import convert
10
  from num2tib.core import convert2text
 
11
  import base64
12
  import re
13
  import requests
@@ -50,10 +50,6 @@ replacements = [
50
  ('╗','')
51
  ]
52
 
53
-
54
-
55
-
56
-
57
  class EndpointHandler():
58
  def __init__(self, path=""):
59
  # load the model
@@ -88,13 +84,24 @@ class EndpointHandler():
88
  speaker_embedding = torch.tensor(speaker_embedding)
89
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
90
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
 
91
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
92
  temp_wav_path = temp_wav_file.name
93
- librosa.output.write_wav(temp_wav_path, speech.numpy(), sr=16000)
 
 
94
  with open(temp_wav_path, "rb") as wav_file:
95
  audio_base64 = base64.b64encode(wav_file.read()).decode("utf-8")
 
 
96
  os.remove(temp_wav_path)
 
97
  return {
98
  "sample_rate": 16000,
99
- "audio_base64": audio_base64,
100
- }
 
 
 
 
 
 
1
  from typing import Dict, Any,Union
 
2
  import tempfile
3
  import numpy as np
4
  import torch
 
7
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
8
  from num2tib.core import convert
9
  from num2tib.core import convert2text
10
+ import soundfile as sf
11
  import base64
12
  import re
13
  import requests
 
50
  ('╗','')
51
  ]
52
 
 
 
 
 
53
  class EndpointHandler():
54
  def __init__(self, path=""):
55
  # load the model
 
84
  speaker_embedding = torch.tensor(speaker_embedding)
85
  speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
86
  speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
87
+ # Create a unique temporary WAV file
88
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
89
  temp_wav_path = temp_wav_file.name
90
+ sf.write(temp_wav_path, speech.numpy(), 16000, 'PCM_24') # Use sf.write to write the WAV file
91
+
92
+ # Read the WAV file and encode it as base64
93
  with open(temp_wav_path, "rb") as wav_file:
94
  audio_base64 = base64.b64encode(wav_file.read()).decode("utf-8")
95
+
96
+ # Clean up the temporary WAV file
97
  os.remove(temp_wav_path)
98
+
99
  return {
100
  "sample_rate": 16000,
101
+ "audio_base64": audio_base64, # Base64-encoded audio data
102
+ }
103
+
104
+
105
+
106
+
107
+