Bils commited on
Commit
d3df06a
·
verified ·
1 Parent(s): a8a7982

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -52
app.py CHANGED
@@ -1,6 +1,13 @@
1
- import gradio as gr
2
  import os
3
  import torch
 
 
 
 
 
 
 
 
4
  from transformers import (
5
  AutoTokenizer,
6
  AutoModelForCausalLM,
@@ -8,12 +15,6 @@ from transformers import (
8
  AutoProcessor,
9
  MusicgenForConditionalGeneration,
10
  )
11
- from scipy.io.wavfile import write
12
- from pydub import AudioSegment
13
- from dotenv import load_dotenv
14
- import tempfile
15
- import spaces
16
-
17
  # Coqui TTS
18
  from TTS.api import TTS
19
 
@@ -99,7 +100,7 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
99
  f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
100
  "1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
101
  "2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
102
- "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
103
  )
104
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
105
 
@@ -198,7 +199,7 @@ def generate_music(prompt: str, audio_length: int):
198
  audio_data = outputs[0, 0].cpu().numpy()
199
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
200
 
201
- output_path = f"{tempfile.gettempdir()}/musicgen_large_generated_music.wav"
202
  write(output_path, 44100, normalized_audio)
203
 
204
  return output_path
@@ -229,26 +230,21 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
229
  voice_len = len(voice) # in milliseconds
230
  music_len = len(music) # in milliseconds
231
 
232
- # 1) If the music is shorter than the voice, loop it:
233
  if music_len < voice_len:
234
  looped_music = AudioSegment.empty()
235
- # Keep appending until we exceed voice length
236
  while len(looped_music) < voice_len:
237
  looped_music += music
238
  music = looped_music
239
 
240
- # 2) If the music is longer than the voice, truncate it:
241
  if len(music) > voice_len:
242
  music = music[:voice_len]
243
 
244
- # Now music and voice are the same length
245
  if ducking:
246
- # Step 1: Reduce music dB while voice is playing
247
  ducked_music = music - duck_level
248
- # Step 2: Overlay voice on top of ducked music
249
  final_audio = ducked_music.overlay(voice)
250
  else:
251
- # No ducking, just overlay
252
  final_audio = music.overlay(voice)
253
 
254
  output_path = os.path.join(tempfile.gettempdir(), "blended_output.wav")
@@ -260,32 +256,73 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
260
 
261
 
262
  # ---------------------------------------------------------------------
263
- # Gradio Interface
264
  # ---------------------------------------------------------------------
265
- with gr.Blocks() as demo:
266
- gr.Markdown("""
267
- # 🎧 AI Promo Studio
268
- Welcome to **AI Promo Studio**, your all-in-one solution for creating professional, engaging audio promos with minimal effort!
269
-
270
- This next-generation platform uses powerful AI models to handle:
271
- - **Script Generation**: Craft concise and impactful copy with LLaMA.
272
- - **Voice Synthesis**: Convert text into natural-sounding voice-overs using Coqui TTS.
273
- - **Music Production**: Generate custom music tracks with MusicGen Large for sound bed.
274
- - **Seamless Blending**: Easily combine voice and music—loop or trim tracks to match your desired promo length, with optional ducking to keep the voice front and center.
275
-
276
- Whether you’re a radio producer, podcaster, or content creator, **AI Promo Studio** streamlines your entire production pipeline—cutting hours of manual editing down to a few clicks.
277
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
 
 
 
 
 
 
 
 
279
 
280
  with gr.Tabs():
281
  # Step 1: Generate Script
282
- with gr.Tab("Step 1: Generate Script"):
283
  with gr.Row():
284
  user_prompt = gr.Textbox(
285
  label="Promo Idea",
286
  placeholder="E.g., A 30-second promo for a morning show...",
287
  lines=2
288
  )
 
289
  llama_model_id = gr.Textbox(
290
  label="LLaMA Model ID",
291
  value="meta-llama/Meta-Llama-3-8B-Instruct",
@@ -298,8 +335,7 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
298
  step=15,
299
  value=30
300
  )
301
-
302
- generate_script_button = gr.Button("Generate Script")
303
  script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5, interactive=False)
304
  sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
305
  music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
@@ -311,8 +347,8 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
311
  )
312
 
313
  # Step 2: Generate Voice
314
- with gr.Tab("Step 2: Generate Voice"):
315
- gr.Markdown("Generate the voice-over using a Coqui TTS model.")
316
  selected_tts_model = gr.Dropdown(
317
  label="TTS Model",
318
  choices=[
@@ -323,7 +359,7 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
323
  value="tts_models/en/ljspeech/tacotron2-DDC",
324
  multiselect=False
325
  )
326
- generate_voice_button = gr.Button("Generate Voice-Over")
327
  voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
328
 
329
  generate_voice_button.click(
@@ -332,18 +368,18 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
332
  outputs=voice_audio_output,
333
  )
334
 
335
- # Step 3: Generate Music (MusicGen Large)
336
- with gr.Tab("Step 3: Generate Music"):
337
- gr.Markdown("Generate a music track with the **MusicGen Large** model.")
338
  audio_length = gr.Slider(
339
  label="Music Length (tokens)",
340
  minimum=128,
341
  maximum=1024,
342
  step=64,
343
  value=512,
344
- info="Increase tokens for longer audio, but be mindful of inference time."
345
  )
346
- generate_music_button = gr.Button("Generate Music")
347
  music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
348
 
349
  generate_music_button.click(
@@ -352,9 +388,9 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
352
  outputs=[music_output],
353
  )
354
 
355
- # Step 4: Blend Audio (Loop/Trim + Ducking)
356
- with gr.Tab("Step 4: Blend Audio"):
357
- gr.Markdown("**Music** will be looped or trimmed to match **Voice** duration, then optionally ducked.")
358
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
359
  duck_level_slider = gr.Slider(
360
  label="Ducking Level (dB attenuation)",
@@ -363,7 +399,7 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
363
  step=1,
364
  value=10
365
  )
366
- blend_button = gr.Button("Blend Voice + Music")
367
  blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
368
 
369
  blend_button.click(
@@ -374,17 +410,21 @@ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Stu
374
 
375
  # Footer
376
  gr.Markdown("""
377
- <hr>
378
- <p style="text-align: center; font-size: 0.9em;">
379
- Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
380
- </p>
 
 
381
  """)
382
 
383
  # Visitor Badge
384
  gr.HTML("""
385
- <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
386
- <img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold&countColor=%23263759" />
387
- </a>
 
 
388
  """)
389
 
390
  demo.launch(debug=True)
 
 
1
  import os
2
  import torch
3
+ import tempfile
4
+ from scipy.io.wavfile import write
5
+ from pydub import AudioSegment
6
+ from dotenv import load_dotenv
7
+ import spaces
8
+ import gradio as gr
9
+
10
+ # Transformers & Models
11
  from transformers import (
12
  AutoTokenizer,
13
  AutoModelForCausalLM,
 
15
  AutoProcessor,
16
  MusicgenForConditionalGeneration,
17
  )
 
 
 
 
 
 
18
  # Coqui TTS
19
  from TTS.api import TTS
20
 
 
100
  f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
101
  "1. A concise voice-over script. Prefix this section with 'Voice-Over Script:'.\n"
102
  "2. Suggestions for sound design. Prefix this section with 'Sound Design Suggestions:'.\n"
103
+ "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
104
  )
105
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
106
 
 
199
  audio_data = outputs[0, 0].cpu().numpy()
200
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
201
 
202
+ output_path = os.path.join(tempfile.gettempdir(), "musicgen_large_generated_music.wav")
203
  write(output_path, 44100, normalized_audio)
204
 
205
  return output_path
 
230
  voice_len = len(voice) # in milliseconds
231
  music_len = len(music) # in milliseconds
232
 
233
+ # Loop music if it's shorter than voice
234
  if music_len < voice_len:
235
  looped_music = AudioSegment.empty()
 
236
  while len(looped_music) < voice_len:
237
  looped_music += music
238
  music = looped_music
239
 
240
+ # Trim music if it's longer than voice
241
  if len(music) > voice_len:
242
  music = music[:voice_len]
243
 
 
244
  if ducking:
 
245
  ducked_music = music - duck_level
 
246
  final_audio = ducked_music.overlay(voice)
247
  else:
 
248
  final_audio = music.overlay(voice)
249
 
250
  output_path = os.path.join(tempfile.gettempdir(), "blended_output.wav")
 
256
 
257
 
258
  # ---------------------------------------------------------------------
259
+ # Gradio Interface with Enhanced UI
260
  # ---------------------------------------------------------------------
261
+ with gr.Blocks(css="""
262
+ /* Global Styles */
263
+ body {
264
+ background: linear-gradient(135deg, #1d1f21, #3a3d41);
265
+ color: #f0f0f0;
266
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
267
+ }
268
+ .header {
269
+ text-align: center;
270
+ padding: 2rem 1rem;
271
+ background: linear-gradient(90deg, #6a11cb, #2575fc);
272
+ border-radius: 0 0 20px 20px;
273
+ margin-bottom: 2rem;
274
+ }
275
+ .header h1 {
276
+ margin: 0;
277
+ font-size: 2.5rem;
278
+ }
279
+ .header p {
280
+ font-size: 1.2rem;
281
+ }
282
+ .gradio-container {
283
+ background: #2e2e2e;
284
+ border-radius: 10px;
285
+ padding: 1rem;
286
+ }
287
+ .tab-title {
288
+ font-size: 1.1rem;
289
+ font-weight: bold;
290
+ }
291
+ .footer {
292
+ text-align: center;
293
+ font-size: 0.9em;
294
+ margin-top: 2rem;
295
+ padding: 1rem;
296
+ color: #cccccc;
297
+ }
298
+ """) as demo:
299
+
300
+ # Custom Header
301
+ with gr.Row(elem_classes="header"):
302
+ gr.Markdown("""
303
+ <h1>🎧 AI Promo Studio</h1>
304
+ <p>Your all-in-one AI solution for crafting engaging audio promos.</p>
305
+ """)
306
 
307
+ gr.Markdown("""
308
+ Welcome to **AI Promo Studio**! This platform leverages state-of-the-art AI models to help you generate:
309
+
310
+ - **Script**: Generate a compelling voice-over script with LLaMA.
311
+ - **Voice Synthesis**: Create natural-sounding voice-overs using Coqui TTS.
312
+ - **Music Production**: Produce custom music tracks with MusicGen.
313
+ - **Audio Blending**: Seamlessly blend voice and music with options for ducking.
314
+ """)
315
 
316
  with gr.Tabs():
317
  # Step 1: Generate Script
318
+ with gr.Tab("📝 Script Generation"):
319
  with gr.Row():
320
  user_prompt = gr.Textbox(
321
  label="Promo Idea",
322
  placeholder="E.g., A 30-second promo for a morning show...",
323
  lines=2
324
  )
325
+ with gr.Row():
326
  llama_model_id = gr.Textbox(
327
  label="LLaMA Model ID",
328
  value="meta-llama/Meta-Llama-3-8B-Instruct",
 
335
  step=15,
336
  value=30
337
  )
338
+ generate_script_button = gr.Button("Generate Script", variant="primary")
 
339
  script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5, interactive=False)
340
  sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
341
  music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
 
347
  )
348
 
349
  # Step 2: Generate Voice
350
+ with gr.Tab("🎤 Voice Synthesis"):
351
+ gr.Markdown("Generate a natural-sounding voice-over using Coqui TTS.")
352
  selected_tts_model = gr.Dropdown(
353
  label="TTS Model",
354
  choices=[
 
359
  value="tts_models/en/ljspeech/tacotron2-DDC",
360
  multiselect=False
361
  )
362
+ generate_voice_button = gr.Button("Generate Voice-Over", variant="primary")
363
  voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
364
 
365
  generate_voice_button.click(
 
368
  outputs=voice_audio_output,
369
  )
370
 
371
+ # Step 3: Generate Music
372
+ with gr.Tab("🎶 Music Production"):
373
+ gr.Markdown("Generate a custom music track using the **MusicGen Large** model.")
374
  audio_length = gr.Slider(
375
  label="Music Length (tokens)",
376
  minimum=128,
377
  maximum=1024,
378
  step=64,
379
  value=512,
380
+ info="Increase tokens for longer audio (inference time may vary)."
381
  )
382
+ generate_music_button = gr.Button("Generate Music", variant="primary")
383
  music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
384
 
385
  generate_music_button.click(
 
388
  outputs=[music_output],
389
  )
390
 
391
+ # Step 4: Blend Audio
392
+ with gr.Tab("🎚️ Audio Blending"):
393
+ gr.Markdown("Blend your voice-over and music track. Music will be looped/truncated to match the voice duration. Enable ducking to lower the music during voice segments.")
394
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
395
  duck_level_slider = gr.Slider(
396
  label="Ducking Level (dB attenuation)",
 
399
  step=1,
400
  value=10
401
  )
402
+ blend_button = gr.Button("Blend Voice + Music", variant="primary")
403
  blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
404
 
405
  blend_button.click(
 
410
 
411
  # Footer
412
  gr.Markdown("""
413
+ <div class="footer">
414
+ <hr>
415
+ Created with ❤️ by <a href="https://bilsimaging.com" target="_blank" style="color: #88aaff;">bilsimaging.com</a>
416
+ <br>
417
+ <small>AI Promo Studio &copy; 2025</small>
418
+ </div>
419
  """)
420
 
421
  # Visitor Badge
422
  gr.HTML("""
423
+ <div style="text-align: center; margin-top: 1rem;">
424
+ <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
425
+ <img src="https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold&countColor=%23263759" alt="visitor badge"/>
426
+ </a>
427
+ </div>
428
  """)
429
 
430
  demo.launch(debug=True)