Bils commited on
Commit
cc173f9
·
verified ·
1 Parent(s): 0198f87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -114
app.py CHANGED
@@ -50,6 +50,7 @@ def get_llama_pipeline(model_id: str, token: str):
50
  """
51
  if model_id in LLAMA_PIPELINES:
52
  return LLAMA_PIPELINES[model_id]
 
53
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
54
  model = AutoModelForCausalLM.from_pretrained(
55
  model_id,
@@ -62,6 +63,7 @@ def get_llama_pipeline(model_id: str, token: str):
62
  LLAMA_PIPELINES[model_id] = text_pipeline
63
  return text_pipeline
64
 
 
65
  def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
66
  """
67
  Returns a cached MusicGen model if available; otherwise, loads it.
@@ -69,6 +71,7 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
69
  """
70
  if model_key in MUSICGEN_MODELS:
71
  return MUSICGEN_MODELS[model_key]
 
72
  model = MusicgenForConditionalGeneration.from_pretrained(model_key)
73
  processor = AutoProcessor.from_pretrained(model_key)
74
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -76,16 +79,19 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
76
  MUSICGEN_MODELS[model_key] = (model, processor)
77
  return model, processor
78
 
 
79
  def get_tts_model(model_name: str = "tts_models/en/ljspeech/tacotron2-DDC"):
80
  """
81
  Returns a cached TTS model if available; otherwise, loads it.
82
  """
83
  if model_name in TTS_MODELS:
84
  return TTS_MODELS[model_name]
 
85
  tts_model = TTS(model_name)
86
  TTS_MODELS[model_name] = tts_model
87
  return tts_model
88
 
 
89
  # ---------------------------------------------------------------------
90
  # Script Generation Function
91
  # ---------------------------------------------------------------------
@@ -97,6 +103,7 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
97
  """
98
  try:
99
  text_pipeline = get_llama_pipeline(model_id, token)
 
100
  system_prompt = (
101
  "You are an expert radio imaging producer specializing in sound design and music. "
102
  f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
@@ -105,6 +112,7 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
105
  "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
106
  )
107
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
 
108
  with torch.inference_mode():
109
  result = text_pipeline(
110
  combined_prompt,
@@ -112,14 +120,17 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
112
  do_sample=True,
113
  temperature=0.8
114
  )
 
115
  generated_text = result[0]["generated_text"]
116
  if "Output:" in generated_text:
117
  generated_text = generated_text.split("Output:")[-1].strip()
 
118
  # Default placeholders
119
  voice_script = "No voice-over script found."
120
  sound_design = "No sound design suggestions found."
121
  music_suggestions = "No music suggestions found."
122
- # Extract sections if present
 
123
  if "Voice-Over Script:" in generated_text:
124
  parts = generated_text.split("Voice-Over Script:")
125
  voice_script_part = parts[1]
@@ -127,6 +138,8 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
127
  voice_script = voice_script_part.split("Sound Design Suggestions:")[0].strip()
128
  else:
129
  voice_script = voice_script_part.strip()
 
 
130
  if "Sound Design Suggestions:" in generated_text:
131
  parts = generated_text.split("Sound Design Suggestions:")
132
  sound_design_part = parts[1]
@@ -134,43 +147,17 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
134
  sound_design = sound_design_part.split("Music Suggestions:")[0].strip()
135
  else:
136
  sound_design = sound_design_part.strip()
 
 
137
  if "Music Suggestions:" in generated_text:
138
  parts = generated_text.split("Music Suggestions:")
139
  music_suggestions = parts[1].strip()
 
140
  return voice_script, sound_design, music_suggestions
 
141
  except Exception as e:
142
  return f"Error generating script: {e}", "", ""
143
 
144
- # ---------------------------------------------------------------------
145
- # Ad Promo Idea Generation Function
146
- # ---------------------------------------------------------------------
147
- @spaces.GPU(duration=100)
148
- def generate_ad_promo_idea(user_prompt: str, model_id: str, token: str):
149
- """
150
- Generates a creative ad promo idea based on the user's concept.
151
- Returns a string containing the ad promo idea.
152
- """
153
- try:
154
- text_pipeline = get_llama_pipeline(model_id, token)
155
- system_prompt = (
156
- "You are a creative advertising strategist. "
157
- "Generate a unique and engaging ad promo idea based on the following concept. "
158
- "Include creative angles, potential taglines, and media suggestions."
159
- )
160
- combined_prompt = f"{system_prompt}\nConcept: {user_prompt}\nAd Promo Idea:"
161
- with torch.inference_mode():
162
- result = text_pipeline(
163
- combined_prompt,
164
- max_new_tokens=150,
165
- do_sample=True,
166
- temperature=0.8
167
- )
168
- generated_text = result[0]["generated_text"]
169
- if "Ad Promo Idea:" in generated_text:
170
- generated_text = generated_text.split("Ad Promo Idea:")[-1].strip()
171
- return generated_text
172
- except Exception as e:
173
- return f"Error generating ad promo idea: {e}"
174
 
175
  # ---------------------------------------------------------------------
176
  # Voice-Over Generation Function
@@ -184,14 +171,21 @@ def generate_voice(script: str, tts_model_name: str = "tts_models/en/ljspeech/ta
184
  try:
185
  if not script.strip():
186
  return "Error: No script provided."
 
 
187
  cleaned_script = clean_text(script)
 
188
  tts_model = get_tts_model(tts_model_name)
 
 
189
  output_path = os.path.join(tempfile.gettempdir(), "voice_over.wav")
190
  tts_model.tts_to_file(text=cleaned_script, file_path=output_path)
191
  return output_path
 
192
  except Exception as e:
193
  return f"Error generating voice: {e}"
194
 
 
195
  # ---------------------------------------------------------------------
196
  # Music Generation Function
197
  # ---------------------------------------------------------------------
@@ -204,23 +198,30 @@ def generate_music(prompt: str, audio_length: int):
204
  try:
205
  if not prompt.strip():
206
  return "Error: No music suggestion provided."
 
207
  model_key = "facebook/musicgen-large"
208
  musicgen_model, musicgen_processor = get_musicgen_model(model_key)
 
209
  device = "cuda" if torch.cuda.is_available() else "cpu"
210
  # Process the input and move each tensor to the proper device
211
  inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt")
212
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
213
  with torch.inference_mode():
214
  outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
215
- # Post-process the output to create a WAV file
216
  audio_data = outputs[0, 0].cpu().numpy()
217
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
 
218
  output_path = os.path.join(tempfile.gettempdir(), "musicgen_large_generated_music.wav")
219
  write(output_path, 44100, normalized_audio)
 
220
  return output_path
 
221
  except Exception as e:
222
  return f"Error generating music: {e}"
223
 
 
224
  # ---------------------------------------------------------------------
225
  # Audio Blending with Duration Sync & Ducking
226
  # ---------------------------------------------------------------------
@@ -228,33 +229,46 @@ def generate_music(prompt: str, audio_length: int):
228
  def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
229
  """
230
  Blends two audio files (voice and music).
 
 
 
231
  Returns the file path to the blended .wav file.
232
  """
233
  try:
234
  if not os.path.isfile(voice_path) or not os.path.isfile(music_path):
235
  return "Error: Missing audio files for blending."
 
236
  voice = AudioSegment.from_wav(voice_path)
237
  music = AudioSegment.from_wav(music_path)
238
- voice_len = len(voice)
239
- music_len = len(music)
 
 
 
240
  if music_len < voice_len:
241
  looped_music = AudioSegment.empty()
242
  while len(looped_music) < voice_len:
243
  looped_music += music
244
  music = looped_music
 
 
245
  if len(music) > voice_len:
246
  music = music[:voice_len]
 
247
  if ducking:
248
  ducked_music = music - duck_level
249
  final_audio = ducked_music.overlay(voice)
250
  else:
251
  final_audio = music.overlay(voice)
 
252
  output_path = os.path.join(tempfile.gettempdir(), "blended_output.wav")
253
  final_audio.export(output_path, format="wav")
254
  return output_path
 
255
  except Exception as e:
256
  return f"Error blending audio: {e}"
257
 
 
258
  # ---------------------------------------------------------------------
259
  # Gradio Interface with Enhanced UI
260
  # ---------------------------------------------------------------------
@@ -274,23 +288,19 @@ with gr.Blocks(css="""
274
  }
275
  .header h1 {
276
  margin: 0;
277
- font-size: 2.8rem;
278
  }
279
  .header p {
280
  font-size: 1.2rem;
281
  }
282
- .instructions {
283
- background-color: #2e2e2e;
284
- border-radius: 8px;
285
- padding: 1rem;
286
- margin-bottom: 1rem;
287
- font-size: 0.95rem;
288
- }
289
  .gradio-container {
290
  background: #2e2e2e;
291
  border-radius: 10px;
292
  padding: 1rem;
293
- margin-bottom: 1rem;
 
 
 
294
  }
295
  .footer {
296
  text-align: center;
@@ -299,64 +309,31 @@ with gr.Blocks(css="""
299
  padding: 1rem;
300
  color: #cccccc;
301
  }
302
- .btn-clear {
303
- margin-left: 1rem;
304
- background: #ff5555;
305
- color: #fff;
306
- }
307
  """) as demo:
308
 
309
  # Custom Header
310
  with gr.Row(elem_classes="header"):
311
  gr.Markdown("""
312
- <h1>🎧 AI Ads Promo</h1>
313
- <p>Your all-in-one AI solution for crafting engaging audio ads. <br><em>Demo MVP</em></p>
314
  """)
315
 
316
  gr.Markdown("""
317
- Welcome to **AI Ads Promo (Demo MVP)**! This platform leverages state-of-the-art AI models to help you generate creative advertising content.
318
- Use the tabs below to generate:
319
- - **Ad Promo Ideas**
320
- - **Voice-Over Scripts**
321
- - **Natural-Sounding Voice-Overs**
322
- - **Custom Music Tracks**
323
- - **Blended Audio Ads**
324
  """)
325
 
326
  with gr.Tabs():
327
- # Tab 1: Ad Promo Idea Generation
328
- with gr.Tab("💡 Ad Promo Idea"):
329
- gr.Markdown("Enter a concept for your ad and let the system generate a creative ad promo idea with taglines and media suggestions.")
330
- with gr.Row():
331
- ad_concept = gr.Textbox(
332
- label="Ad Concept",
333
- placeholder="E.g., A vibrant summer sale for a trendy clothing brand...",
334
- lines=2
335
- )
336
- with gr.Row():
337
- llama_model_id_idea = gr.Textbox(
338
- label="LLaMA Model ID",
339
- value="meta-llama/Meta-Llama-3-8B-Instruct",
340
- placeholder="Enter a valid Hugging Face model ID"
341
- )
342
- with gr.Row():
343
- generate_ad_idea_button = gr.Button("Generate Ad Promo Idea", variant="primary")
344
- clear_ad_idea = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
345
- ad_idea_output = gr.Textbox(label="Generated Ad Promo Idea", lines=5, interactive=False)
346
- generate_ad_idea_button.click(
347
- fn=lambda concept, model_id: generate_ad_promo_idea(concept, model_id, HF_TOKEN),
348
- inputs=[ad_concept, llama_model_id_idea],
349
- outputs=ad_idea_output
350
- )
351
- clear_ad_idea.click(fn=lambda: "", inputs=None, outputs=ad_idea_output)
352
-
353
- # Tab 2: Script Generation
354
  with gr.Tab("📝 Script Generation"):
355
- gr.Markdown("Generate a voice-over script along with sound design and music suggestions based on your promo idea.")
356
  with gr.Row():
357
  user_prompt = gr.Textbox(
358
  label="Promo Idea",
359
- placeholder="E.g., A 30-second energetic promo for a new product launch...",
360
  lines=2
361
  )
362
  with gr.Row():
@@ -372,22 +349,20 @@ with gr.Blocks(css="""
372
  step=15,
373
  value=30
374
  )
375
- with gr.Row():
376
- generate_script_button = gr.Button("Generate Script", variant="primary")
377
- clear_script = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
378
- script_output = gr.Textbox(label="Voice-Over Script", lines=5, interactive=False)
379
  sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
380
  music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
 
381
  generate_script_button.click(
382
  fn=lambda user_prompt, model_id, dur: generate_script(user_prompt, model_id, HF_TOKEN, dur),
383
  inputs=[user_prompt, llama_model_id, duration],
384
- outputs=[script_output, sound_design_output, music_suggestion_output]
385
  )
386
- clear_script.click(fn=lambda: ["", "", ""], inputs=None, outputs=[script_output, sound_design_output, music_suggestion_output])
387
 
388
- # Tab 3: Voice Synthesis
389
  with gr.Tab("🎤 Voice Synthesis"):
390
- gr.Markdown("Convert your generated script into a natural-sounding voice-over using Coqui TTS.")
391
  selected_tts_model = gr.Dropdown(
392
  label="TTS Model",
393
  choices=[
@@ -398,19 +373,18 @@ with gr.Blocks(css="""
398
  value="tts_models/en/ljspeech/tacotron2-DDC",
399
  multiselect=False
400
  )
401
- with gr.Row():
402
- generate_voice_button = gr.Button("Generate Voice-Over", variant="primary")
403
- clear_voice = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
404
  voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
 
405
  generate_voice_button.click(
406
  fn=lambda script, tts_model: generate_voice(script, tts_model),
407
- inputs=script_output, outputs=voice_audio_output
 
408
  )
409
- clear_voice.click(fn=lambda: "", inputs=None, outputs=voice_audio_output)
410
 
411
- # Tab 4: Music Production
412
  with gr.Tab("🎶 Music Production"):
413
- gr.Markdown("Generate a custom music track based on the suggestions using the MusicGen model.")
414
  audio_length = gr.Slider(
415
  label="Music Length (tokens)",
416
  minimum=128,
@@ -419,20 +393,18 @@ with gr.Blocks(css="""
419
  value=512,
420
  info="Increase tokens for longer audio (inference time may vary)."
421
  )
422
- with gr.Row():
423
- generate_music_button = gr.Button("Generate Music", variant="primary")
424
- clear_music = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
425
  music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
 
426
  generate_music_button.click(
427
  fn=lambda music_suggestion, length: generate_music(music_suggestion, length),
428
  inputs=[music_suggestion_output, audio_length],
429
- outputs=[music_output]
430
  )
431
- clear_music.click(fn=lambda: "", inputs=None, outputs=music_output)
432
 
433
- # Tab 5: Audio Blending
434
  with gr.Tab("🎚️ Audio Blending"):
435
- gr.Markdown("Blend your voice-over and music track. Music will be adjusted to match the voice duration with an option to enable ducking.")
436
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
437
  duck_level_slider = gr.Slider(
438
  label="Ducking Level (dB attenuation)",
@@ -441,16 +413,14 @@ with gr.Blocks(css="""
441
  step=1,
442
  value=10
443
  )
444
- with gr.Row():
445
- blend_button = gr.Button("Blend Voice + Music", variant="primary")
446
- clear_blend = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
447
  blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
 
448
  blend_button.click(
449
  fn=blend_audio,
450
  inputs=[voice_audio_output, music_output, ducking_checkbox, duck_level_slider],
451
  outputs=blended_output
452
  )
453
- clear_blend.click(fn=lambda: "", inputs=None, outputs=blended_output)
454
 
455
  # Footer
456
  gr.Markdown("""
@@ -458,10 +428,11 @@ with gr.Blocks(css="""
458
  <hr>
459
  Created with ❤️ by <a href="https://bilsimaging.com" target="_blank" style="color: #88aaff;">bilsimaging.com</a>
460
  <br>
461
- <small>AI Ads Promo (Demo MVP) &copy; 2025</small>
462
  </div>
463
  """)
464
 
 
465
  gr.HTML("""
466
  <div style="text-align: center; margin-top: 1rem;">
467
  <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
@@ -471,4 +442,3 @@ with gr.Blocks(css="""
471
  """)
472
 
473
  demo.launch(debug=True)
474
-
 
50
  """
51
  if model_id in LLAMA_PIPELINES:
52
  return LLAMA_PIPELINES[model_id]
53
+
54
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
55
  model = AutoModelForCausalLM.from_pretrained(
56
  model_id,
 
63
  LLAMA_PIPELINES[model_id] = text_pipeline
64
  return text_pipeline
65
 
66
+
67
  def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
68
  """
69
  Returns a cached MusicGen model if available; otherwise, loads it.
 
71
  """
72
  if model_key in MUSICGEN_MODELS:
73
  return MUSICGEN_MODELS[model_key]
74
+
75
  model = MusicgenForConditionalGeneration.from_pretrained(model_key)
76
  processor = AutoProcessor.from_pretrained(model_key)
77
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
79
  MUSICGEN_MODELS[model_key] = (model, processor)
80
  return model, processor
81
 
82
+
83
  def get_tts_model(model_name: str = "tts_models/en/ljspeech/tacotron2-DDC"):
84
  """
85
  Returns a cached TTS model if available; otherwise, loads it.
86
  """
87
  if model_name in TTS_MODELS:
88
  return TTS_MODELS[model_name]
89
+
90
  tts_model = TTS(model_name)
91
  TTS_MODELS[model_name] = tts_model
92
  return tts_model
93
 
94
+
95
  # ---------------------------------------------------------------------
96
  # Script Generation Function
97
  # ---------------------------------------------------------------------
 
103
  """
104
  try:
105
  text_pipeline = get_llama_pipeline(model_id, token)
106
+
107
  system_prompt = (
108
  "You are an expert radio imaging producer specializing in sound design and music. "
109
  f"Based on the user's concept and the selected duration of {duration} seconds, produce the following: "
 
112
  "3. Music styles or track recommendations. Prefix this section with 'Music Suggestions:'."
113
  )
114
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nOutput:"
115
+
116
  with torch.inference_mode():
117
  result = text_pipeline(
118
  combined_prompt,
 
120
  do_sample=True,
121
  temperature=0.8
122
  )
123
+
124
  generated_text = result[0]["generated_text"]
125
  if "Output:" in generated_text:
126
  generated_text = generated_text.split("Output:")[-1].strip()
127
+
128
  # Default placeholders
129
  voice_script = "No voice-over script found."
130
  sound_design = "No sound design suggestions found."
131
  music_suggestions = "No music suggestions found."
132
+
133
+ # Voice-Over Script
134
  if "Voice-Over Script:" in generated_text:
135
  parts = generated_text.split("Voice-Over Script:")
136
  voice_script_part = parts[1]
 
138
  voice_script = voice_script_part.split("Sound Design Suggestions:")[0].strip()
139
  else:
140
  voice_script = voice_script_part.strip()
141
+
142
+ # Sound Design
143
  if "Sound Design Suggestions:" in generated_text:
144
  parts = generated_text.split("Sound Design Suggestions:")
145
  sound_design_part = parts[1]
 
147
  sound_design = sound_design_part.split("Music Suggestions:")[0].strip()
148
  else:
149
  sound_design = sound_design_part.strip()
150
+
151
+ # Music Suggestions
152
  if "Music Suggestions:" in generated_text:
153
  parts = generated_text.split("Music Suggestions:")
154
  music_suggestions = parts[1].strip()
155
+
156
  return voice_script, sound_design, music_suggestions
157
+
158
  except Exception as e:
159
  return f"Error generating script: {e}", "", ""
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  # ---------------------------------------------------------------------
163
  # Voice-Over Generation Function
 
171
  try:
172
  if not script.strip():
173
  return "Error: No script provided."
174
+
175
+ # Clean the script to remove special characters (e.g., asterisks) that may produce warnings
176
  cleaned_script = clean_text(script)
177
+
178
  tts_model = get_tts_model(tts_model_name)
179
+
180
+ # Generate and save voice
181
  output_path = os.path.join(tempfile.gettempdir(), "voice_over.wav")
182
  tts_model.tts_to_file(text=cleaned_script, file_path=output_path)
183
  return output_path
184
+
185
  except Exception as e:
186
  return f"Error generating voice: {e}"
187
 
188
+
189
  # ---------------------------------------------------------------------
190
  # Music Generation Function
191
  # ---------------------------------------------------------------------
 
198
  try:
199
  if not prompt.strip():
200
  return "Error: No music suggestion provided."
201
+
202
  model_key = "facebook/musicgen-large"
203
  musicgen_model, musicgen_processor = get_musicgen_model(model_key)
204
+
205
  device = "cuda" if torch.cuda.is_available() else "cpu"
206
  # Process the input and move each tensor to the proper device
207
  inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt")
208
  inputs = {k: v.to(device) for k, v in inputs.items()}
209
+
210
  with torch.inference_mode():
211
  outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
212
+
213
  audio_data = outputs[0, 0].cpu().numpy()
214
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
215
+
216
  output_path = os.path.join(tempfile.gettempdir(), "musicgen_large_generated_music.wav")
217
  write(output_path, 44100, normalized_audio)
218
+
219
  return output_path
220
+
221
  except Exception as e:
222
  return f"Error generating music: {e}"
223
 
224
+
225
  # ---------------------------------------------------------------------
226
  # Audio Blending with Duration Sync & Ducking
227
  # ---------------------------------------------------------------------
 
229
  def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
230
  """
231
  Blends two audio files (voice and music).
232
+ 1. If music < voice, loops the music until it meets/exceeds the voice duration.
233
+ 2. If music > voice, trims music to the voice duration.
234
+ 3. If ducking=True, the music is attenuated by 'duck_level' dB while the voice is playing.
235
  Returns the file path to the blended .wav file.
236
  """
237
  try:
238
  if not os.path.isfile(voice_path) or not os.path.isfile(music_path):
239
  return "Error: Missing audio files for blending."
240
+
241
  voice = AudioSegment.from_wav(voice_path)
242
  music = AudioSegment.from_wav(music_path)
243
+
244
+ voice_len = len(voice) # in milliseconds
245
+ music_len = len(music) # in milliseconds
246
+
247
+ # Loop music if it's shorter than the voice
248
  if music_len < voice_len:
249
  looped_music = AudioSegment.empty()
250
  while len(looped_music) < voice_len:
251
  looped_music += music
252
  music = looped_music
253
+
254
+ # Trim music if it's longer than the voice
255
  if len(music) > voice_len:
256
  music = music[:voice_len]
257
+
258
  if ducking:
259
  ducked_music = music - duck_level
260
  final_audio = ducked_music.overlay(voice)
261
  else:
262
  final_audio = music.overlay(voice)
263
+
264
  output_path = os.path.join(tempfile.gettempdir(), "blended_output.wav")
265
  final_audio.export(output_path, format="wav")
266
  return output_path
267
+
268
  except Exception as e:
269
  return f"Error blending audio: {e}"
270
 
271
+
272
  # ---------------------------------------------------------------------
273
  # Gradio Interface with Enhanced UI
274
  # ---------------------------------------------------------------------
 
288
  }
289
  .header h1 {
290
  margin: 0;
291
+ font-size: 2.5rem;
292
  }
293
  .header p {
294
  font-size: 1.2rem;
295
  }
 
 
 
 
 
 
 
296
  .gradio-container {
297
  background: #2e2e2e;
298
  border-radius: 10px;
299
  padding: 1rem;
300
+ }
301
+ .tab-title {
302
+ font-size: 1.1rem;
303
+ font-weight: bold;
304
  }
305
  .footer {
306
  text-align: center;
 
309
  padding: 1rem;
310
  color: #cccccc;
311
  }
 
 
 
 
 
312
  """) as demo:
313
 
314
  # Custom Header
315
  with gr.Row(elem_classes="header"):
316
  gr.Markdown("""
317
+ <h1>🎧 AI Promo Studio</h1>
318
+ <p>Your all-in-one AI solution for crafting engaging audio promos.</p>
319
  """)
320
 
321
  gr.Markdown("""
322
+ Welcome to **AI Promo Studio**! This platform leverages state-of-the-art AI models to help you generate:
323
+
324
+ - **Script**: Generate a compelling voice-over script with LLaMA.
325
+ - **Voice Synthesis**: Create natural-sounding voice-overs using Coqui TTS.
326
+ - **Music Production**: Produce custom music tracks with MusicGen.
327
+ - **Audio Blending**: Seamlessly blend voice and music with options for ducking.
 
328
  """)
329
 
330
  with gr.Tabs():
331
+ # Step 1: Generate Script
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  with gr.Tab("📝 Script Generation"):
 
333
  with gr.Row():
334
  user_prompt = gr.Textbox(
335
  label="Promo Idea",
336
+ placeholder="E.g., A 30-second promo for a morning show...",
337
  lines=2
338
  )
339
  with gr.Row():
 
349
  step=15,
350
  value=30
351
  )
352
+ generate_script_button = gr.Button("Generate Script", variant="primary")
353
+ script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5, interactive=False)
 
 
354
  sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
355
  music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
356
+
357
  generate_script_button.click(
358
  fn=lambda user_prompt, model_id, dur: generate_script(user_prompt, model_id, HF_TOKEN, dur),
359
  inputs=[user_prompt, llama_model_id, duration],
360
+ outputs=[script_output, sound_design_output, music_suggestion_output],
361
  )
 
362
 
363
+ # Step 2: Generate Voice
364
  with gr.Tab("🎤 Voice Synthesis"):
365
+ gr.Markdown("Generate a natural-sounding voice-over using Coqui TTS.")
366
  selected_tts_model = gr.Dropdown(
367
  label="TTS Model",
368
  choices=[
 
373
  value="tts_models/en/ljspeech/tacotron2-DDC",
374
  multiselect=False
375
  )
376
+ generate_voice_button = gr.Button("Generate Voice-Over", variant="primary")
 
 
377
  voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
378
+
379
  generate_voice_button.click(
380
  fn=lambda script, tts_model: generate_voice(script, tts_model),
381
+ inputs=[script_output, selected_tts_model],
382
+ outputs=voice_audio_output,
383
  )
 
384
 
385
+ # Step 3: Generate Music
386
  with gr.Tab("🎶 Music Production"):
387
+ gr.Markdown("Generate a custom music track using the **MusicGen Large** model.")
388
  audio_length = gr.Slider(
389
  label="Music Length (tokens)",
390
  minimum=128,
 
393
  value=512,
394
  info="Increase tokens for longer audio (inference time may vary)."
395
  )
396
+ generate_music_button = gr.Button("Generate Music", variant="primary")
 
 
397
  music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
398
+
399
  generate_music_button.click(
400
  fn=lambda music_suggestion, length: generate_music(music_suggestion, length),
401
  inputs=[music_suggestion_output, audio_length],
402
+ outputs=[music_output],
403
  )
 
404
 
405
+ # Step 4: Blend Audio
406
  with gr.Tab("🎚️ Audio Blending"):
407
+ gr.Markdown("Blend your voice-over and music track. Music will be looped/truncated to match the voice duration. Enable ducking to lower the music during voice segments.")
408
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
409
  duck_level_slider = gr.Slider(
410
  label="Ducking Level (dB attenuation)",
 
413
  step=1,
414
  value=10
415
  )
416
+ blend_button = gr.Button("Blend Voice + Music", variant="primary")
 
 
417
  blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
418
+
419
  blend_button.click(
420
  fn=blend_audio,
421
  inputs=[voice_audio_output, music_output, ducking_checkbox, duck_level_slider],
422
  outputs=blended_output
423
  )
 
424
 
425
  # Footer
426
  gr.Markdown("""
 
428
  <hr>
429
  Created with ❤️ by <a href="https://bilsimaging.com" target="_blank" style="color: #88aaff;">bilsimaging.com</a>
430
  <br>
431
+ <small>AI Promo Studio &copy; 2025</small>
432
  </div>
433
  """)
434
 
435
+ # Visitor Badge
436
  gr.HTML("""
437
  <div style="text-align: center; margin-top: 1rem;">
438
  <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
 
442
  """)
443
 
444
  demo.launch(debug=True)