Spaces:
Sleeping
Sleeping
TheM1N9
commited on
Commit
·
b4093cc
1
Parent(s):
0104e23
Refactor audio generation in NotebookMg to write directly to output file, eliminating the need for intermediate AudioSegment. This change improves performance and simplifies the audio processing workflow.
Browse files
main.py
CHANGED
@@ -227,50 +227,45 @@ class NotebookMg:
|
|
227 |
|
228 |
def generate_audio(self, speaker_lines: List[Tuple[str, str]], output_path: str):
|
229 |
"""Step 4: Generate and stitch audio using ElevenLabs"""
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
pause = AudioSegment.silent(duration=300) # 300ms pause
|
270 |
-
combined_audio += audio_segment + pause
|
271 |
-
|
272 |
-
# Export final audio
|
273 |
-
combined_audio.export(output_path, format="mp3")
|
274 |
|
275 |
|
276 |
# def main():
|
|
|
227 |
|
228 |
def generate_audio(self, speaker_lines: List[Tuple[str, str]], output_path: str):
|
229 |
"""Step 4: Generate and stitch audio using ElevenLabs"""
|
230 |
+
# Open the output file in binary write mode
|
231 |
+
with open(output_path, "wb") as outfile:
|
232 |
+
for i, (speaker, line) in enumerate(speaker_lines):
|
233 |
+
# Select appropriate voice ID
|
234 |
+
voice_id = (
|
235 |
+
self.Akshara_voice_id
|
236 |
+
if speaker == "Akshara"
|
237 |
+
else self.Tharun_voice_id
|
238 |
+
)
|
239 |
+
|
240 |
+
# Get previous and next text for context
|
241 |
+
previous_text = speaker_lines[i - 1][1] if i > 0 else None
|
242 |
+
next_text = (
|
243 |
+
speaker_lines[i + 1][1] if i < len(speaker_lines) - 1 else None
|
244 |
+
)
|
245 |
+
|
246 |
+
# Update voice settings with required parameters
|
247 |
+
self.eleven_client.voices.edit_settings(
|
248 |
+
voice_id=voice_id,
|
249 |
+
request=VoiceSettings(
|
250 |
+
stability=0.5,
|
251 |
+
similarity_boost=0.75,
|
252 |
+
use_speaker_boost=True,
|
253 |
+
),
|
254 |
+
)
|
255 |
+
|
256 |
+
# Generate audio for this line
|
257 |
+
audio_data = self.eleven_client.text_to_speech.convert(
|
258 |
+
voice_id=voice_id,
|
259 |
+
output_format="mp3_44100_128",
|
260 |
+
text=line,
|
261 |
+
model_id="eleven_multilingual_v2",
|
262 |
+
previous_text=previous_text,
|
263 |
+
next_text=next_text,
|
264 |
+
)
|
265 |
+
|
266 |
+
# Write the audio data directly to the file
|
267 |
+
audio_bytes = b"".join(audio_data)
|
268 |
+
outfile.write(audio_bytes)
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
|
271 |
# def main():
|