Bils commited on
Commit
d9bf0f0
·
verified ·
1 Parent(s): 43b4c58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -94
app.py CHANGED
@@ -9,9 +9,9 @@ from transformers import (
9
  MusicgenForConditionalGeneration,
10
  )
11
  from scipy.io.wavfile import write
12
- from TTS.api import TTS
13
- import tempfile
14
  from dotenv import load_dotenv
 
15
  import spaces
16
 
17
  # Load environment variables
@@ -19,10 +19,10 @@ load_dotenv()
19
  hf_token = os.getenv("HF_TOKEN")
20
 
21
  # ---------------------------------------------------------------------
22
- # Load Llama 3 Pipeline with Zero GPU (Encapsulated)
23
  # ---------------------------------------------------------------------
24
  @spaces.GPU(duration=300)
25
- def generate_script(user_prompt: str, duration: int, model_id: str, token: str):
26
  try:
27
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
28
  model = AutoModelForCausalLM.from_pretrained(
@@ -36,7 +36,7 @@ def generate_script(user_prompt: str, duration: int, model_id: str, token: str):
36
 
37
  system_prompt = (
38
  "You are an expert radio imaging producer specializing in sound design and music. "
39
- f"Generate a concise, creative promo script for a {duration}-second ad, focusing on auditory elements and musical appeal."
40
  )
41
 
42
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
@@ -46,7 +46,7 @@ def generate_script(user_prompt: str, duration: int, model_id: str, token: str):
46
  return f"Error generating script: {e}"
47
 
48
  # ---------------------------------------------------------------------
49
- # Load MusicGen Model (Encapsulated)
50
  # ---------------------------------------------------------------------
51
  @spaces.GPU(duration=300)
52
  def generate_audio(prompt: str, audio_length: int):
@@ -62,128 +62,116 @@ def generate_audio(prompt: str, audio_length: int):
62
 
63
  audio_data = outputs[0, 0].cpu().numpy()
64
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
65
-
66
- output_path = f"{tempfile.gettempdir()}/generated_audio.wav"
67
  write(output_path, musicgen_model.config.audio_encoder.sampling_rate, normalized_audio)
 
68
  return output_path
69
  except Exception as e:
70
  return f"Error generating audio: {e}"
71
 
72
  # ---------------------------------------------------------------------
73
- # Generate Voice-Over with Coqui XTTS-v2
74
  # ---------------------------------------------------------------------
75
  @spaces.GPU(duration=300)
76
- def generate_voice(script: str, reference_audio: str, language: str):
77
  try:
78
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
79
- output_path = f"{tempfile.gettempdir()}/voice_over.wav"
80
- tts.tts_to_file(
81
- text=script,
82
- file_path=output_path,
83
- speaker_wav=reference_audio,
84
- language=language,
85
- )
86
- return output_path
87
  except Exception as e:
88
  return f"Error generating voice-over: {e}"
89
 
90
  # ---------------------------------------------------------------------
91
- # Interface Functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  # ---------------------------------------------------------------------
93
- def interface_generate_script(user_prompt, duration, llama_model_id):
94
- return generate_script(user_prompt, duration, llama_model_id, hf_token)
95
 
96
  def interface_generate_audio(script, audio_length):
97
  return generate_audio(script, audio_length)
98
 
99
- def interface_generate_voice(script, reference_audio, language):
100
- return generate_voice(script, reference_audio, language)
 
 
 
 
101
 
102
  # ---------------------------------------------------------------------
103
  # Interface
104
  # ---------------------------------------------------------------------
105
  with gr.Blocks() as demo:
106
- gr.Markdown("""
107
- # 🎧 All-in-One Radio Promo Studio 🚀
108
- ### Create professional scripts, soundscapes, and voice-overs in minutes!
109
- 🔥 Powered by **Llama 3**, **MusicGen**, and **XTTS-v2**
110
- """)
 
 
111
 
112
- # Script Generation Section
113
  gr.Markdown("## ✍️ Step 1: Generate Your Promo Script")
114
  with gr.Row():
115
- user_prompt = gr.Textbox(
116
- label="🎤 Enter Promo Idea",
117
- placeholder="E.g., A 15-second energetic jingle for a morning talk show.",
118
- lines=2
119
- )
120
- duration = gr.Dropdown(
121
- label="⏳ Duration",
122
- choices=["15", "30", "60"],
123
- value="15",
124
- info="Choose the duration of the promo (in seconds)."
125
- )
126
- llama_model_id = gr.Textbox(
127
- label="🎛️ Llama 3 Model ID",
128
- value="meta-llama/Meta-Llama-3-8B-Instruct"
129
- )
130
- generate_script_button = gr.Button("Generate Script ✨")
131
- script_output = gr.Textbox(label="🖌️ Generated Promo Script", lines=4, interactive=False)
132
 
133
- # Audio Generation Section
134
- gr.Markdown("## 🎵 Step 2: Generate Background Music")
135
  with gr.Row():
136
- audio_length = gr.Slider(
137
- label="🎶 Audio Length (tokens)",
138
- minimum=128,
139
- maximum=1024,
140
- step=64,
141
- value=512
142
- )
143
- generate_audio_button = gr.Button("Generate Audio 🎶")
144
- audio_output = gr.Audio(label="🎵 Generated Audio", type="filepath")
145
 
146
- # Voice-Over Section
147
- gr.Markdown("## 🎙️ Step 3: Generate Voice-Over")
148
  with gr.Row():
149
- reference_audio = gr.Audio(
150
- label="🎤 Upload Reference Voice (6 seconds)",
151
- type="filepath"
152
- )
153
- language = gr.Dropdown(
154
- label="🌍 Language",
155
- choices=["en", "es", "fr", "de", "it"],
156
- value="en"
157
- )
158
- generate_voice_button = gr.Button("Generate Voice-Over 🎤")
159
- voice_output = gr.Audio(label="🔊 Generated Voice-Over", type="filepath")
160
 
161
- # Footer
162
- gr.Markdown("""
163
- <br><hr>
164
- <p style="text-align: center; font-size: 0.9em;">
165
- Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
166
- </p>
167
- """)
168
 
169
  # Button Actions
170
- generate_script_button.click(
171
- fn=interface_generate_script,
172
- inputs=[user_prompt, duration, llama_model_id],
173
- outputs=script_output
174
- )
175
- generate_audio_button.click(
176
- fn=interface_generate_audio,
177
- inputs=[script_output, audio_length],
178
- outputs=audio_output
179
- )
180
- generate_voice_button.click(
181
- fn=interface_generate_voice,
182
- inputs=[script_output, reference_audio, language],
183
- outputs=voice_output
184
  )
185
 
186
- # ---------------------------------------------------------------------
187
  # Launch App
188
- # ---------------------------------------------------------------------
189
  demo.launch(debug=True)
 
9
  MusicgenForConditionalGeneration,
10
  )
11
  from scipy.io.wavfile import write
12
+ from pydub import AudioSegment
 
13
  from dotenv import load_dotenv
14
+ import tempfile
15
  import spaces
16
 
17
  # Load environment variables
 
19
  hf_token = os.getenv("HF_TOKEN")
20
 
21
  # ---------------------------------------------------------------------
22
+ # Generate Script
23
  # ---------------------------------------------------------------------
24
  @spaces.GPU(duration=300)
25
+ def generate_script(user_prompt: str, model_id: str, token: str):
26
  try:
27
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
28
  model = AutoModelForCausalLM.from_pretrained(
 
36
 
37
  system_prompt = (
38
  "You are an expert radio imaging producer specializing in sound design and music. "
39
+ "Take the user's concept and craft a concise, creative promo script with a strong focus on auditory elements and musical appeal."
40
  )
41
 
42
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
 
46
  return f"Error generating script: {e}"
47
 
48
  # ---------------------------------------------------------------------
49
+ # Generate Music
50
  # ---------------------------------------------------------------------
51
  @spaces.GPU(duration=300)
52
  def generate_audio(prompt: str, audio_length: int):
 
62
 
63
  audio_data = outputs[0, 0].cpu().numpy()
64
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
65
+ output_path = f"{tempfile.gettempdir()}/generated_music.wav"
 
66
  write(output_path, musicgen_model.config.audio_encoder.sampling_rate, normalized_audio)
67
+
68
  return output_path
69
  except Exception as e:
70
  return f"Error generating audio: {e}"
71
 
72
  # ---------------------------------------------------------------------
73
+ # Generate Voice-Over (TTS)
74
  # ---------------------------------------------------------------------
75
  @spaces.GPU(duration=300)
76
+ def generate_voice(script: str, language: str):
77
  try:
78
+ tts_model = pipeline("text-to-speech", model="coqui/XTTS-v2")
79
+ tts_output = tts_model(script, language=language)
80
+
81
+ voice_path = f"{tempfile.gettempdir()}/generated_voice.wav"
82
+ with open(voice_path, "wb") as f:
83
+ f.write(tts_output["audio"])
84
+
85
+ return voice_path
 
86
  except Exception as e:
87
  return f"Error generating voice-over: {e}"
88
 
89
  # ---------------------------------------------------------------------
90
+ # Mix Audio with Ducking Option
91
+ # ---------------------------------------------------------------------
92
+ def mix_audio(voice_file, music_file, output_file, ducking: bool):
93
+ try:
94
+ voice = AudioSegment.from_file(voice_file)
95
+ music = AudioSegment.from_file(music_file)
96
+
97
+ if ducking:
98
+ music = music - 10 # Lower the volume of the music
99
+ combined = music.overlay(voice, position=0)
100
+
101
+ combined.export(output_file, format="wav")
102
+ return output_file
103
+ except Exception as e:
104
+ return f"Error mixing audio: {e}"
105
+
106
+ # ---------------------------------------------------------------------
107
+ # Gradio Interface Functions
108
  # ---------------------------------------------------------------------
109
+ def interface_generate_script(user_prompt, llama_model_id):
110
+ return generate_script(user_prompt, llama_model_id, hf_token)
111
 
112
  def interface_generate_audio(script, audio_length):
113
  return generate_audio(script, audio_length)
114
 
115
+ def interface_generate_voice(script, language):
116
+ return generate_voice(script, language)
117
+
118
+ def interface_mix_audio(voice_file, music_file, ducking):
119
+ output_file = f"{tempfile.gettempdir()}/final_promo.wav"
120
+ return mix_audio(voice_file, music_file, output_file, ducking)
121
 
122
  # ---------------------------------------------------------------------
123
  # Interface
124
  # ---------------------------------------------------------------------
125
  with gr.Blocks() as demo:
126
+ gr.Markdown(
127
+ """
128
+ # 🎙️ AI Radio Promo Maker 🚀
129
+ ### Your one-stop solution for **scripts**, **voice-overs**, and **music**!
130
+ 🔥 **Zero GPU** integration powered by **Hugging Face** models.
131
+ """
132
+ )
133
 
134
+ # Step 1: Generate Script
135
  gr.Markdown("## ✍️ Step 1: Generate Your Promo Script")
136
  with gr.Row():
137
+ user_prompt = gr.Textbox(label="Enter Promo Idea", placeholder="E.g., A 15-second energetic jingle.", lines=2)
138
+ llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
139
+ generate_script_button = gr.Button("Generate Script")
140
+ script_output = gr.Textbox(label="Generated Script", lines=4, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ # Step 2: Generate Voice-Over
143
+ gr.Markdown("## 🎤 Step 2: Generate Voice-Over")
144
  with gr.Row():
145
+ language = gr.Dropdown(label="Select Language", choices=["en", "es", "fr", "de"], value="en")
146
+ generate_voice_button = gr.Button("Generate Voice")
147
+ voice_output = gr.Audio(label="Generated Voice", type="filepath", interactive=False)
 
 
 
 
 
 
148
 
149
+ # Step 3: Generate Music
150
+ gr.Markdown("## 🎵 Step 3: Generate Background Music")
151
  with gr.Row():
152
+ audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
153
+ generate_audio_button = gr.Button("Generate Music")
154
+ audio_output = gr.Audio(label="Generated Music", type="filepath", interactive=False)
 
 
 
 
 
 
 
 
155
 
156
+ # Step 4: Mix Audio
157
+ gr.Markdown("## 🎶 Step 4: Mix Audio")
158
+ with gr.Row():
159
+ ducking = gr.Checkbox(label="Enable Ducking (lower background music volume)", value=True)
160
+ mix_audio_button = gr.Button("Mix Audio")
161
+ final_output = gr.Audio(label="Final Promo Audio", type="filepath", interactive=False)
 
162
 
163
  # Button Actions
164
+ generate_script_button.click(interface_generate_script, inputs=[user_prompt, llama_model_id], outputs=script_output)
165
+ generate_voice_button.click(interface_generate_voice, inputs=[script_output, language], outputs=voice_output)
166
+ generate_audio_button.click(interface_generate_audio, inputs=[script_output, audio_length], outputs=audio_output)
167
+ mix_audio_button.click(interface_mix_audio, inputs=[voice_output, audio_output, ducking], outputs=final_output)
168
+
169
+ gr.Markdown(
170
+ """
171
+ <hr>
172
+ <p style="text-align: center;">Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a></p>
173
+ """
 
 
 
 
174
  )
175
 
 
176
  # Launch App
 
177
  demo.launch(debug=True)