Spaces:
Sleeping
Sleeping
Update generate_audio.py
Browse files- generate_audio.py +6 -3
generate_audio.py
CHANGED
@@ -10,14 +10,14 @@ from scipy.io import wavfile
|
|
10 |
from pydub import AudioSegment
|
11 |
import io
|
12 |
import ast
|
13 |
-
|
14 |
|
15 |
|
16 |
class TTSGenerator:
|
17 |
"""
|
18 |
A class to generate podcast-style audio from a transcript using ParlerTTS and Bark models.
|
19 |
"""
|
20 |
-
|
21 |
def __init__(self, transcript_file_path):
|
22 |
"""
|
23 |
Initialize the TTS generator with the path to the rewritten transcript file.
|
@@ -53,7 +53,8 @@ class TTSGenerator:
|
|
53 |
"""
|
54 |
with open(self.transcript_file_path, 'rb') as f:
|
55 |
return ast.literal_eval(pickle.load(f))
|
56 |
-
|
|
|
57 |
def generate_speaker1_audio(self, text):
|
58 |
"""
|
59 |
Generate audio for Speaker 1 using ParlerTTS.
|
@@ -71,6 +72,7 @@ class TTSGenerator:
|
|
71 |
audio_arr = generation.cpu().numpy().squeeze()
|
72 |
return audio_arr, self.parler_model.config.sampling_rate
|
73 |
|
|
|
74 |
def generate_speaker2_audio(self, text):
|
75 |
"""
|
76 |
Generate audio for Speaker 2 using Bark.
|
@@ -88,6 +90,7 @@ class TTSGenerator:
|
|
88 |
return audio_arr, self.bark_sampling_rate
|
89 |
|
90 |
@staticmethod
|
|
|
91 |
def numpy_to_audio_segment(audio_arr, sampling_rate):
|
92 |
"""
|
93 |
Convert numpy array to AudioSegment.
|
|
|
10 |
from pydub import AudioSegment
|
11 |
import io
|
12 |
import ast
|
13 |
+
import spaces
|
14 |
|
15 |
|
16 |
class TTSGenerator:
|
17 |
"""
|
18 |
A class to generate podcast-style audio from a transcript using ParlerTTS and Bark models.
|
19 |
"""
|
20 |
+
@spaces.GPU
|
21 |
def __init__(self, transcript_file_path):
|
22 |
"""
|
23 |
Initialize the TTS generator with the path to the rewritten transcript file.
|
|
|
53 |
"""
|
54 |
with open(self.transcript_file_path, 'rb') as f:
|
55 |
return ast.literal_eval(pickle.load(f))
|
56 |
+
|
57 |
+
@spaces.GPU
|
58 |
def generate_speaker1_audio(self, text):
|
59 |
"""
|
60 |
Generate audio for Speaker 1 using ParlerTTS.
|
|
|
72 |
audio_arr = generation.cpu().numpy().squeeze()
|
73 |
return audio_arr, self.parler_model.config.sampling_rate
|
74 |
|
75 |
+
@spaces.GPU
|
76 |
def generate_speaker2_audio(self, text):
|
77 |
"""
|
78 |
Generate audio for Speaker 2 using Bark.
|
|
|
90 |
return audio_arr, self.bark_sampling_rate
|
91 |
|
92 |
@staticmethod
|
93 |
+
@spaces.GPU
|
94 |
def numpy_to_audio_segment(audio_arr, sampling_rate):
|
95 |
"""
|
96 |
Convert numpy array to AudioSegment.
|