TenzinGayche
commited on
Commit
·
4f18e92
1
Parent(s):
94e964f
Update handler.py
Browse files- handler.py +15 -8
handler.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from typing import Dict, Any,Union
|
2 |
-
import librosa
|
3 |
import tempfile
|
4 |
import numpy as np
|
5 |
import torch
|
@@ -8,6 +7,7 @@ import noisereduce as nr
|
|
8 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
9 |
from num2tib.core import convert
|
10 |
from num2tib.core import convert2text
|
|
|
11 |
import base64
|
12 |
import re
|
13 |
import requests
|
@@ -50,10 +50,6 @@ replacements = [
|
|
50 |
('╗','')
|
51 |
]
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
class EndpointHandler():
|
58 |
def __init__(self, path=""):
|
59 |
# load the model
|
@@ -88,13 +84,24 @@ class EndpointHandler():
|
|
88 |
speaker_embedding = torch.tensor(speaker_embedding)
|
89 |
speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
|
90 |
speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
|
|
|
91 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
|
92 |
temp_wav_path = temp_wav_file.name
|
93 |
-
|
|
|
|
|
94 |
with open(temp_wav_path, "rb") as wav_file:
|
95 |
audio_base64 = base64.b64encode(wav_file.read()).decode("utf-8")
|
|
|
|
|
96 |
os.remove(temp_wav_path)
|
|
|
97 |
return {
|
98 |
"sample_rate": 16000,
|
99 |
-
"audio_base64": audio_base64,
|
100 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from typing import Dict, Any,Union
|
|
|
2 |
import tempfile
|
3 |
import numpy as np
|
4 |
import torch
|
|
|
7 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
8 |
from num2tib.core import convert
|
9 |
from num2tib.core import convert2text
|
10 |
+
import soundfile as sf
|
11 |
import base64
|
12 |
import re
|
13 |
import requests
|
|
|
50 |
('╗','')
|
51 |
]
|
52 |
|
|
|
|
|
|
|
|
|
53 |
class EndpointHandler():
|
54 |
def __init__(self, path=""):
|
55 |
# load the model
|
|
|
84 |
speaker_embedding = torch.tensor(speaker_embedding)
|
85 |
speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
|
86 |
speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
|
87 |
+
# Create a unique temporary WAV file
|
88 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
|
89 |
temp_wav_path = temp_wav_file.name
|
90 |
+
sf.write(temp_wav_path, speech.numpy(), 16000, 'PCM_24') # Use sf.write to write the WAV file
|
91 |
+
|
92 |
+
# Read the WAV file and encode it as base64
|
93 |
with open(temp_wav_path, "rb") as wav_file:
|
94 |
audio_base64 = base64.b64encode(wav_file.read()).decode("utf-8")
|
95 |
+
|
96 |
+
# Clean up the temporary WAV file
|
97 |
os.remove(temp_wav_path)
|
98 |
+
|
99 |
return {
|
100 |
"sample_rate": 16000,
|
101 |
+
"audio_base64": audio_base64, # Base64-encoded audio data
|
102 |
+
}
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
|