yasserrmd commited on
Commit
8797420
·
verified ·
1 Parent(s): fce33e5

Update generate_audio.py

Browse files
Files changed (1) hide show
  1. generate_audio.py +8 -7
generate_audio.py CHANGED
@@ -1,6 +1,6 @@
1
  # generate_audio.py
2
 
3
- import spaces
4
  import pickle
5
  import torch
6
  import numpy as np
@@ -18,7 +18,7 @@ class TTSGenerator:
18
  """
19
  A class to generate podcast-style audio from a transcript using ParlerTTS and Bark models.
20
  """
21
- @spaces.GPU
22
  def __init__(self, transcript_file_path,output_audio_path):
23
  """
24
  Initialize the TTS generator with the path to the rewritten transcript file.
@@ -48,7 +48,7 @@ class TTSGenerator:
48
  # self.bark_sampling_rate = 24000
49
  # self.voice_preset = "v2/en_speaker_6"
50
 
51
- @spaces.GPU
52
  def load_transcript(self):
53
  """
54
  Loads the rewritten transcript from the specified file.
@@ -59,7 +59,7 @@ class TTSGenerator:
59
  with open(self.transcript_file_path, 'rb') as f:
60
  return ast.literal_eval(pickle.load(f))
61
 
62
- @spaces.GPU(duration=30)
63
  def generate_speaker1_audio(self, text):
64
  """
65
  Generate audio for Speaker 1 using ParlerTTS.
@@ -92,7 +92,7 @@ class TTSGenerator:
92
  audio_arr = generation.cpu().numpy().squeeze()
93
  return audio_arr, self.parler_model.config.sampling_rate
94
 
95
- @spaces.GPU(duration=30)
96
  def generate_speaker2_audio(self, text):
97
  """
98
  Generate audio for Speaker 2 using Bark.
@@ -130,8 +130,9 @@ class TTSGenerator:
130
  # audio_arr = speech_output[0].cpu().numpy()
131
  return audio_arr, self.parler_model.config.sampling_rate
132
 
 
 
133
  @staticmethod
134
- @spaces.GPU
135
  def numpy_to_audio_segment(audio_arr, sampling_rate):
136
  """
137
  Convert numpy array to AudioSegment.
@@ -149,7 +150,7 @@ class TTSGenerator:
149
  byte_io.seek(0)
150
  return AudioSegment.from_wav(byte_io)
151
 
152
- @spaces.GPU(duration=300)
153
  def generate_audio(self):
154
  """
155
  Converts the transcript into audio and saves it to a file.
 
1
  # generate_audio.py
2
 
3
+ #import spaces
4
  import pickle
5
  import torch
6
  import numpy as np
 
18
  """
19
  A class to generate podcast-style audio from a transcript using ParlerTTS and Bark models.
20
  """
21
+ #@spaces.GPU
22
  def __init__(self, transcript_file_path,output_audio_path):
23
  """
24
  Initialize the TTS generator with the path to the rewritten transcript file.
 
48
  # self.bark_sampling_rate = 24000
49
  # self.voice_preset = "v2/en_speaker_6"
50
 
51
+ #@spaces.GPU
52
  def load_transcript(self):
53
  """
54
  Loads the rewritten transcript from the specified file.
 
59
  with open(self.transcript_file_path, 'rb') as f:
60
  return ast.literal_eval(pickle.load(f))
61
 
62
+ #@spaces.GPU(duration=30)
63
  def generate_speaker1_audio(self, text):
64
  """
65
  Generate audio for Speaker 1 using ParlerTTS.
 
92
  audio_arr = generation.cpu().numpy().squeeze()
93
  return audio_arr, self.parler_model.config.sampling_rate
94
 
95
+ #@spaces.GPU(duration=30)
96
  def generate_speaker2_audio(self, text):
97
  """
98
  Generate audio for Speaker 2 using Bark.
 
130
  # audio_arr = speech_output[0].cpu().numpy()
131
  return audio_arr, self.parler_model.config.sampling_rate
132
 
133
+
134
+ #@spaces.GPU
135
  @staticmethod
 
136
  def numpy_to_audio_segment(audio_arr, sampling_rate):
137
  """
138
  Convert numpy array to AudioSegment.
 
150
  byte_io.seek(0)
151
  return AudioSegment.from_wav(byte_io)
152
 
153
+ #@spaces.GPU(duration=300)
154
  def generate_audio(self):
155
  """
156
  Converts the transcript into audio and saves it to a file.