Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Feb 3

Commit

d3df06a

verified ·

1 Parent(s): a8a7982

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -52

app.py CHANGED Viewed

@@ -1,6 +1,13 @@
-import gradio as gr
 import os
 import torch
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
@@ -8,12 +15,6 @@ from transformers import (
     AutoProcessor,
     MusicgenForConditionalGeneration,
 )
-from scipy.io.wavfile import write
-from pydub import AudioSegment
-from dotenv import load_dotenv
-import tempfile
-import spaces
 # Coqui TTS
 from TTS.api import TTS
@@ -99,7 +100,7 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
             f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
             "1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
             "2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
-            "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
@@ -198,7 +199,7 @@ def generate_music(prompt: str, audio_length: int):
         audio_data = outputs[0, 0].cpu().numpy()
         normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
-        output_path = f"{tempfile.gettempdir()}/musicgen_large_generated_music.wav"
         write(output_path, 44100, normalized_audio)
         return output_path
@@ -229,26 +230,21 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
         voice_len = len(voice)  # in milliseconds
         music_len = len(music)  # in milliseconds
-        # 1) If the music is shorter than the voice, loop it:
         if music_len < voice_len:
             looped_music = AudioSegment.empty()
-            # Keep appending until we exceed voice length
             while len(looped_music) < voice_len:
                 looped_music += music
             music = looped_music
-        # 2) If the music is longer than the voice, truncate it:
         if len(music) > voice_len:
             music = music[:voice_len]
-        # Now music and voice are the same length
         if ducking:
-            # Step 1: Reduce music dB while voice is playing
             ducked_music = music - duck_level
-            # Step 2: Overlay voice on top of ducked music
             final_audio = ducked_music.overlay(voice)
         else:
-            # No ducking, just overlay
             final_audio = music.overlay(voice)
         output_path = os.path.join(tempfile.gettempdir(), "blended_output.wav")
@@ -260,32 +256,73 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
 # ---------------------------------------------------------------------
-# Gradio Interface
 # ---------------------------------------------------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("""
-# 🎧 AI Promo Studio
-Welcome to **AI Promo Studio**, your all-in-one solution for creating professional, engaging audio promos with minimal effort!
-This next-generation platform uses powerful AI models to handle:
-- **Script Generation**: Craft concise and impactful copy with LLaMA.
-- **Voice Synthesis**: Convert text into natural-sounding voice-overs using Coqui TTS.
-- **Music Production**: Generate custom music tracks with MusicGen Large for sound bed.
-- **Seamless Blending**: Easily combine voice and music—loop or trim tracks to match your desired promo length, with optional ducking to keep the voice front and center.
-Whether you’re a radio producer, podcaster, or content creator, **AI Promo Studio** streamlines your entire production pipeline—cutting hours of manual editing down to a few clicks.
-""")
     with gr.Tabs():
         # Step 1: Generate Script
-        with gr.Tab("Step 1: Generate Script"):
             with gr.Row():
                 user_prompt = gr.Textbox(
                     label="Promo Idea",
                     placeholder="E.g., A 30-second promo for a morning show...",
                     lines=2
                 )
                 llama_model_id = gr.Textbox(
                     label="LLaMA Model ID",
                     value="meta-llama/Meta-Llama-3-8B-Instruct",
@@ -298,8 +335,7 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
                     step=15,
                     value=30
                 )
-            generate_script_button = gr.Button("Generate Script")
             script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5, interactive=False)
             sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
             music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
@@ -311,8 +347,8 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
             )
         # Step 2: Generate Voice
-        with gr.Tab("Step 2: Generate Voice"):
-            gr.Markdown("Generate the voice-over using a Coqui TTS model.")
             selected_tts_model = gr.Dropdown(
                 label="TTS Model",
                 choices=[
@@ -323,7 +359,7 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
                 value="tts_models/en/ljspeech/tacotron2-DDC",
                 multiselect=False
             )
-            generate_voice_button = gr.Button("Generate Voice-Over")
             voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
             generate_voice_button.click(
@@ -332,18 +368,18 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
                 outputs=voice_audio_output,
             )
-        # Step 3: Generate Music (MusicGen Large)
-        with gr.Tab("Step 3: Generate Music"):
-            gr.Markdown("Generate a music track with the **MusicGen Large** model.")
             audio_length = gr.Slider(
                 label="Music Length (tokens)",
                 minimum=128,
                 maximum=1024,
                 step=64,
                 value=512,
-                info="Increase tokens for longer audio, but be mindful of inference time."
             )
-            generate_music_button = gr.Button("Generate Music")
             music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
             generate_music_button.click(
@@ -352,9 +388,9 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
                 outputs=[music_output],
             )
-        # Step 4: Blend Audio (Loop/Trim + Ducking)
-        with gr.Tab("Step 4: Blend Audio"):
-            gr.Markdown("**Music** will be looped or trimmed to match **Voice** duration, then optionally ducked.")
             ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
             duck_level_slider = gr.Slider(
                 label="Ducking Level (dB attenuation)",
@@ -363,7 +399,7 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
                 step=1,
                 value=10
             )
-            blend_button = gr.Button("Blend Voice + Music")
             blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
             blend_button.click(
@@ -374,17 +410,21 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
     # Footer
     gr.Markdown("""
-    <hr>
-    <p style="text-align: center; font-size: 0.9em;">
-        Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
-    </p>
     """)
     # Visitor Badge
     gr.HTML("""
-    <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
-        <img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold&countColor=%23263759" />
-    </a>
     """)
 demo.launch(debug=True)

 import os
 import torch
+import tempfile
+from scipy.io.wavfile import write
+from pydub import AudioSegment
+from dotenv import load_dotenv
+import spaces
+import gradio as gr
+# Transformers & Models
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     AutoProcessor,
     MusicgenForConditionalGeneration,
 )
 # Coqui TTS
 from TTS.api import TTS
             f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
             "1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
             "2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
+            "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
         audio_data = outputs[0, 0].cpu().numpy()
         normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
+        output_path = os.path.join(tempfile.gettempdir(), "musicgen_large_generated_music.wav")
         write(output_path, 44100, normalized_audio)
         return output_path
         voice_len = len(voice)  # in milliseconds
         music_len = len(music)  # in milliseconds
+        # Loop music if it's shorter than voice
         if music_len < voice_len:
             looped_music = AudioSegment.empty()
             while len(looped_music) < voice_len:
                 looped_music += music
             music = looped_music
+        # Trim music if it's longer than voice
         if len(music) > voice_len:
             music = music[:voice_len]
         if ducking:
             ducked_music = music - duck_level
             final_audio = ducked_music.overlay(voice)
         else:
             final_audio = music.overlay(voice)
         output_path = os.path.join(tempfile.gettempdir(), "blended_output.wav")
 # ---------------------------------------------------------------------
+# Gradio Interface with Enhanced UI
 # ---------------------------------------------------------------------
+with gr.Blocks(css="""
+    /* Global Styles */
+    body {
+        background: linear-gradient(135deg, #1d1f21, #3a3d41);
+        color: #f0f0f0;
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .header {
+        text-align: center;
+        padding: 2rem 1rem;
+        background: linear-gradient(90deg, #6a11cb, #2575fc);
+        border-radius: 0 0 20px 20px;
+        margin-bottom: 2rem;
+    }
+    .header h1 {
+        margin: 0;
+        font-size: 2.5rem;
+    }
+    .header p {
+        font-size: 1.2rem;
+    }
+    .gradio-container {
+        background: #2e2e2e;
+        border-radius: 10px;
+        padding: 1rem;
+    }
+    .tab-title {
+        font-size: 1.1rem;
+        font-weight: bold;
+    }
+    .footer {
+        text-align: center;
+        font-size: 0.9em;
+        margin-top: 2rem;
+        padding: 1rem;
+        color: #cccccc;
+    }
+""") as demo:
+    # Custom Header
+    with gr.Row(elem_classes="header"):
+        gr.Markdown("""
+        <h1>🎧 AI Promo Studio</h1>
+        <p>Your all-in-one AI solution for crafting engaging audio promos.</p>
+        """)
+    gr.Markdown("""
+    Welcome to **AI Promo Studio**! This platform leverages state-of-the-art AI models to help you generate:
+    - **Script**: Generate a compelling voice-over script with LLaMA.
+    - **Voice Synthesis**: Create natural-sounding voice-overs using Coqui TTS.
+    - **Music Production**: Produce custom music tracks with MusicGen.
+    - **Audio Blending**: Seamlessly blend voice and music with options for ducking.
+    """)
     with gr.Tabs():
         # Step 1: Generate Script
+        with gr.Tab("📝 Script Generation"):
             with gr.Row():
                 user_prompt = gr.Textbox(
                     label="Promo Idea",
                     placeholder="E.g., A 30-second promo for a morning show...",
                     lines=2
                 )
+            with gr.Row():
                 llama_model_id = gr.Textbox(
                     label="LLaMA Model ID",
                     value="meta-llama/Meta-Llama-3-8B-Instruct",
                     step=15,
                     value=30
                 )
+            generate_script_button = gr.Button("Generate Script", variant="primary")
             script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5, interactive=False)
             sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
             music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
             )
         # Step 2: Generate Voice
+        with gr.Tab("🎤 Voice Synthesis"):
+            gr.Markdown("Generate a natural-sounding voice-over using Coqui TTS.")
             selected_tts_model = gr.Dropdown(
                 label="TTS Model",
                 choices=[
                 value="tts_models/en/ljspeech/tacotron2-DDC",
                 multiselect=False
             )
+            generate_voice_button = gr.Button("Generate Voice-Over", variant="primary")
             voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
             generate_voice_button.click(
                 outputs=voice_audio_output,
             )
+        # Step 3: Generate Music
+        with gr.Tab("🎶 Music Production"):
+            gr.Markdown("Generate a custom music track using the **MusicGen Large** model.")
             audio_length = gr.Slider(
                 label="Music Length (tokens)",
                 minimum=128,
                 maximum=1024,
                 step=64,
                 value=512,
+                info="Increase tokens for longer audio (inference time may vary)."
             )
+            generate_music_button = gr.Button("Generate Music", variant="primary")
             music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
             generate_music_button.click(
                 outputs=[music_output],
             )
+        # Step 4: Blend Audio
+        with gr.Tab("🎚️ Audio Blending"):
+            gr.Markdown("Blend your voice-over and music track. Music will be looped/truncated to match the voice duration. Enable ducking to lower the music during voice segments.")
             ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
             duck_level_slider = gr.Slider(
                 label="Ducking Level (dB attenuation)",
                 step=1,
                 value=10
             )
+            blend_button = gr.Button("Blend Voice + Music", variant="primary")
             blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
             blend_button.click(
     # Footer
     gr.Markdown("""
+    <div class="footer">
+        <hr>
+        Created with ❤️ by <a href="https://bilsimaging.com" target="_blank" style="color: #88aaff;">bilsimaging.com</a>
+        <br>
+        <small>AI Promo Studio &copy; 2025</small>
+    </div>
     """)
     # Visitor Badge
     gr.HTML("""
+    <div style="text-align: center; margin-top: 1rem;">
+        <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
+            <img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold&countColor=%23263759" alt="visitor badge"/>
+        </a>
+    </div>
     """)
 demo.launch(debug=True)