TheM1N9 commited on
Commit
b4093cc
·
1 Parent(s): 0104e23

Refactor audio generation in NotebookMg to write directly to output file, eliminating the need for intermediate AudioSegment. This change improves performance and simplifies the audio processing workflow.

Browse files
Files changed (1) hide show
  1. main.py +39 -44
main.py CHANGED
@@ -227,50 +227,45 @@ class NotebookMg:
227
 
228
  def generate_audio(self, speaker_lines: List[Tuple[str, str]], output_path: str):
229
  """Step 4: Generate and stitch audio using ElevenLabs"""
230
- combined_audio = AudioSegment.empty()
231
-
232
- for i, (speaker, line) in enumerate(speaker_lines):
233
- # Select appropriate voice ID
234
- voice_id = (
235
- self.Akshara_voice_id if speaker == "Akshara" else self.Tharun_voice_id
236
- )
237
-
238
- # Get previous and next text for context
239
- previous_text = speaker_lines[i - 1][1] if i > 0 else None
240
- next_text = speaker_lines[i + 1][1] if i < len(speaker_lines) - 1 else None
241
-
242
- # Update voice settings with required parameters
243
- self.eleven_client.voices.edit_settings(
244
- voice_id=voice_id,
245
- request=VoiceSettings(
246
- stability=0.5, # Add stability parameter (0-1)
247
- similarity_boost=0.75, # Add similarity_boost parameter (0-1)
248
- use_speaker_boost=True,
249
- ),
250
- )
251
-
252
- # Generate audio for this line
253
- audio_data = self.eleven_client.text_to_speech.convert(
254
- voice_id=voice_id,
255
- output_format="mp3_44100_128",
256
- text=line,
257
- model_id="eleven_multilingual_v2",
258
- previous_text=previous_text,
259
- next_text=next_text,
260
- )
261
-
262
- # Convert audio data to AudioSegment
263
- audio_bytes = b"".join(audio_data)
264
- audio_segment = AudioSegment.from_file(
265
- io.BytesIO(audio_bytes), format="mp3"
266
- )
267
-
268
- # Add small pause between segments
269
- pause = AudioSegment.silent(duration=300) # 300ms pause
270
- combined_audio += audio_segment + pause
271
-
272
- # Export final audio
273
- combined_audio.export(output_path, format="mp3")
274
 
275
 
276
  # def main():
 
227
 
228
  def generate_audio(self, speaker_lines: List[Tuple[str, str]], output_path: str):
229
  """Step 4: Generate and stitch audio using ElevenLabs"""
230
+ # Open the output file in binary write mode
231
+ with open(output_path, "wb") as outfile:
232
+ for i, (speaker, line) in enumerate(speaker_lines):
233
+ # Select appropriate voice ID
234
+ voice_id = (
235
+ self.Akshara_voice_id
236
+ if speaker == "Akshara"
237
+ else self.Tharun_voice_id
238
+ )
239
+
240
+ # Get previous and next text for context
241
+ previous_text = speaker_lines[i - 1][1] if i > 0 else None
242
+ next_text = (
243
+ speaker_lines[i + 1][1] if i < len(speaker_lines) - 1 else None
244
+ )
245
+
246
+ # Update voice settings with required parameters
247
+ self.eleven_client.voices.edit_settings(
248
+ voice_id=voice_id,
249
+ request=VoiceSettings(
250
+ stability=0.5,
251
+ similarity_boost=0.75,
252
+ use_speaker_boost=True,
253
+ ),
254
+ )
255
+
256
+ # Generate audio for this line
257
+ audio_data = self.eleven_client.text_to_speech.convert(
258
+ voice_id=voice_id,
259
+ output_format="mp3_44100_128",
260
+ text=line,
261
+ model_id="eleven_multilingual_v2",
262
+ previous_text=previous_text,
263
+ next_text=next_text,
264
+ )
265
+
266
+ # Write the audio data directly to the file
267
+ audio_bytes = b"".join(audio_data)
268
+ outfile.write(audio_bytes)
 
 
 
 
 
269
 
270
 
271
  # def main():