Spaces:

m1n9k7
/

NotebookMg

Sleeping

App Files Files Community

TheM1N9 commited on Dec 24, 2024

Commit

b4093cc

1 Parent(s): 0104e23

Refactor audio generation in NotebookMg to write directly to output file, eliminating the need for intermediate AudioSegment. This change improves performance and simplifies the audio processing workflow.

Browse files

Files changed (1) hide show

main.py +39 -44

main.py CHANGED Viewed

@@ -227,50 +227,45 @@ class NotebookMg:
     def generate_audio(self, speaker_lines: List[Tuple[str, str]], output_path: str):
         """Step 4: Generate and stitch audio using ElevenLabs"""
-        combined_audio = AudioSegment.empty()
-        for i, (speaker, line) in enumerate(speaker_lines):
-            # Select appropriate voice ID
-            voice_id = (
-                self.Akshara_voice_id if speaker == "Akshara" else self.Tharun_voice_id
-            )
-            # Get previous and next text for context
-            previous_text = speaker_lines[i - 1][1] if i > 0 else None
-            next_text = speaker_lines[i + 1][1] if i < len(speaker_lines) - 1 else None
-            # Update voice settings with required parameters
-            self.eleven_client.voices.edit_settings(
-                voice_id=voice_id,
-                request=VoiceSettings(
-                    stability=0.5,  # Add stability parameter (0-1)
-                    similarity_boost=0.75,  # Add similarity_boost parameter (0-1)
-                    use_speaker_boost=True,
-                ),
-            )
-            # Generate audio for this line
-            audio_data = self.eleven_client.text_to_speech.convert(
-                voice_id=voice_id,
-                output_format="mp3_44100_128",
-                text=line,
-                model_id="eleven_multilingual_v2",
-                previous_text=previous_text,
-                next_text=next_text,
-            )
-            # Convert audio data to AudioSegment
-            audio_bytes = b"".join(audio_data)
-            audio_segment = AudioSegment.from_file(
-                io.BytesIO(audio_bytes), format="mp3"
-            )
-            # Add small pause between segments
-            pause = AudioSegment.silent(duration=300)  # 300ms pause
-            combined_audio += audio_segment + pause
-        # Export final audio
-        combined_audio.export(output_path, format="mp3")
 # def main():

     def generate_audio(self, speaker_lines: List[Tuple[str, str]], output_path: str):
         """Step 4: Generate and stitch audio using ElevenLabs"""
+        # Open the output file in binary write mode
+        with open(output_path, "wb") as outfile:
+            for i, (speaker, line) in enumerate(speaker_lines):
+                # Select appropriate voice ID
+                voice_id = (
+                    self.Akshara_voice_id
+                    if speaker == "Akshara"
+                    else self.Tharun_voice_id
+                )
+                # Get previous and next text for context
+                previous_text = speaker_lines[i - 1][1] if i > 0 else None
+                next_text = (
+                    speaker_lines[i + 1][1] if i < len(speaker_lines) - 1 else None
+                )
+                # Update voice settings with required parameters
+                self.eleven_client.voices.edit_settings(
+                    voice_id=voice_id,
+                    request=VoiceSettings(
+                        stability=0.5,
+                        similarity_boost=0.75,
+                        use_speaker_boost=True,
+                    ),
+                )
+                # Generate audio for this line
+                audio_data = self.eleven_client.text_to_speech.convert(
+                    voice_id=voice_id,
+                    output_format="mp3_44100_128",
+                    text=line,
+                    model_id="eleven_multilingual_v2",
+                    previous_text=previous_text,
+                    next_text=next_text,
+                )
+                # Write the audio data directly to the file
+                audio_bytes = b"".join(audio_data)
+                outfile.write(audio_bytes)
 # def main():