Bils commited on
Commit
b950350
·
verified ·
1 Parent(s): d9bf0f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -97
app.py CHANGED
@@ -10,8 +10,9 @@ from transformers import (
10
  )
11
  from scipy.io.wavfile import write
12
  from pydub import AudioSegment
13
- from dotenv import load_dotenv
14
  import tempfile
 
15
  import spaces
16
 
17
  # Load environment variables
@@ -19,10 +20,10 @@ load_dotenv()
19
  hf_token = os.getenv("HF_TOKEN")
20
 
21
  # ---------------------------------------------------------------------
22
- # Generate Script
23
  # ---------------------------------------------------------------------
24
  @spaces.GPU(duration=300)
25
- def generate_script(user_prompt: str, model_id: str, token: str):
26
  try:
27
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
28
  model = AutoModelForCausalLM.from_pretrained(
@@ -35,21 +36,45 @@ def generate_script(user_prompt: str, model_id: str, token: str):
35
  llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
36
 
37
  system_prompt = (
38
- "You are an expert radio imaging producer specializing in sound design and music. "
39
- "Take the user's concept and craft a concise, creative promo script with a strong focus on auditory elements and musical appeal."
 
40
  )
41
 
42
- combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
43
  result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
44
- return result[0]["generated_text"].split("Refined script:")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  except Exception as e:
46
- return f"Error generating script: {e}"
47
 
48
  # ---------------------------------------------------------------------
49
- # Generate Music
50
  # ---------------------------------------------------------------------
51
  @spaces.GPU(duration=300)
52
- def generate_audio(prompt: str, audio_length: int):
53
  try:
54
  musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
55
  musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
@@ -62,116 +87,81 @@ def generate_audio(prompt: str, audio_length: int):
62
 
63
  audio_data = outputs[0, 0].cpu().numpy()
64
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
 
65
  output_path = f"{tempfile.gettempdir()}/generated_music.wav"
66
- write(output_path, musicgen_model.config.audio_encoder.sampling_rate, normalized_audio)
67
 
68
  return output_path
69
  except Exception as e:
70
- return f"Error generating audio: {e}"
71
 
72
  # ---------------------------------------------------------------------
73
- # Generate Voice-Over (TTS)
74
  # ---------------------------------------------------------------------
75
- @spaces.GPU(duration=300)
76
- def generate_voice(script: str, language: str):
77
  try:
78
- tts_model = pipeline("text-to-speech", model="coqui/XTTS-v2")
79
- tts_output = tts_model(script, language=language)
80
-
81
- voice_path = f"{tempfile.gettempdir()}/generated_voice.wav"
82
- with open(voice_path, "wb") as f:
83
- f.write(tts_output["audio"])
84
-
85
- return voice_path
86
- except Exception as e:
87
- return f"Error generating voice-over: {e}"
88
-
89
- # ---------------------------------------------------------------------
90
- # Mix Audio with Ducking Option
91
- # ---------------------------------------------------------------------
92
- def mix_audio(voice_file, music_file, output_file, ducking: bool):
93
- try:
94
- voice = AudioSegment.from_file(voice_file)
95
- music = AudioSegment.from_file(music_file)
96
 
97
  if ducking:
98
- music = music - 10 # Lower the volume of the music
99
- combined = music.overlay(voice, position=0)
100
 
101
- combined.export(output_file, format="wav")
102
- return output_file
 
 
 
103
  except Exception as e:
104
- return f"Error mixing audio: {e}"
105
 
106
  # ---------------------------------------------------------------------
107
- # Gradio Interface Functions
108
  # ---------------------------------------------------------------------
109
- def interface_generate_script(user_prompt, llama_model_id):
110
- return generate_script(user_prompt, llama_model_id, hf_token)
 
 
111
 
112
- def interface_generate_audio(script, audio_length):
113
- return generate_audio(script, audio_length)
 
114
 
115
- def interface_generate_voice(script, language):
116
- return generate_voice(script, language)
 
117
 
118
- def interface_mix_audio(voice_file, music_file, ducking):
119
- output_file = f"{tempfile.gettempdir()}/final_promo.wav"
120
- return mix_audio(voice_file, music_file, output_file, ducking)
121
 
122
- # ---------------------------------------------------------------------
123
- # Interface
124
- # ---------------------------------------------------------------------
125
  with gr.Blocks() as demo:
126
- gr.Markdown(
127
- """
128
- # 🎙️ AI Radio Promo Maker 🚀
129
- ### Your one-stop solution for **scripts**, **voice-overs**, and **music**!
130
- 🔥 **Zero GPU** integration powered by **Hugging Face** models.
131
- """
132
- )
133
-
134
- # Step 1: Generate Script
135
- gr.Markdown("## ✍️ Step 1: Generate Your Promo Script")
136
- with gr.Row():
137
- user_prompt = gr.Textbox(label="Enter Promo Idea", placeholder="E.g., A 15-second energetic jingle.", lines=2)
138
- llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
139
- generate_script_button = gr.Button("Generate Script")
140
- script_output = gr.Textbox(label="Generated Script", lines=4, interactive=False)
141
-
142
- # Step 2: Generate Voice-Over
143
- gr.Markdown("## 🎤 Step 2: Generate Voice-Over")
144
- with gr.Row():
145
- language = gr.Dropdown(label="Select Language", choices=["en", "es", "fr", "de"], value="en")
146
- generate_voice_button = gr.Button("Generate Voice")
147
- voice_output = gr.Audio(label="Generated Voice", type="filepath", interactive=False)
148
 
149
- # Step 3: Generate Music
150
- gr.Markdown("## 🎵 Step 3: Generate Background Music")
151
  with gr.Row():
152
- audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
153
- generate_audio_button = gr.Button("Generate Music")
154
- audio_output = gr.Audio(label="Generated Music", type="filepath", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- # Step 4: Mix Audio
157
- gr.Markdown("## 🎶 Step 4: Mix Audio")
158
- with gr.Row():
159
- ducking = gr.Checkbox(label="Enable Ducking (lower background music volume)", value=True)
160
- mix_audio_button = gr.Button("Mix Audio")
161
- final_output = gr.Audio(label="Final Promo Audio", type="filepath", interactive=False)
162
-
163
- # Button Actions
164
- generate_script_button.click(interface_generate_script, inputs=[user_prompt, llama_model_id], outputs=script_output)
165
- generate_voice_button.click(interface_generate_voice, inputs=[script_output, language], outputs=voice_output)
166
- generate_audio_button.click(interface_generate_audio, inputs=[script_output, audio_length], outputs=audio_output)
167
- mix_audio_button.click(interface_mix_audio, inputs=[voice_output, audio_output, ducking], outputs=final_output)
168
-
169
- gr.Markdown(
170
- """
171
  <hr>
172
- <p style="text-align: center;">Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a></p>
173
- """
174
- )
 
175
 
176
- # Launch App
177
  demo.launch(debug=True)
 
10
  )
11
  from scipy.io.wavfile import write
12
  from pydub import AudioSegment
13
+ from pydub.playback import play
14
  import tempfile
15
+ from dotenv import load_dotenv
16
  import spaces
17
 
18
  # Load environment variables
 
20
  hf_token = os.getenv("HF_TOKEN")
21
 
22
  # ---------------------------------------------------------------------
23
+ # Script Generation Function
24
  # ---------------------------------------------------------------------
25
  @spaces.GPU(duration=300)
26
+ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
27
  try:
28
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
29
  model = AutoModelForCausalLM.from_pretrained(
 
36
  llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
37
 
38
  system_prompt = (
39
+ f"You are an expert radio imaging producer specializing in sound design and music. "
40
+ f"Based on the user's concept and the selected duration of {duration} seconds, craft a concise, engaging promo script. "
41
+ f"Ensure the script fits within the time limit and suggest a matching music style that complements the theme."
42
  )
43
 
44
+ combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
45
  result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
46
+
47
+ generated_text = result[0]["generated_text"].split("Refined script and music suggestion:")[-1].strip()
48
+ script, music_suggestion = generated_text.split("Music Suggestion:")
49
+ return script.strip(), music_suggestion.strip()
50
+ except Exception as e:
51
+ return f"Error generating script: {e}", None
52
+
53
+ # ---------------------------------------------------------------------
54
+ # Voice-Over Generation Function
55
+ # ---------------------------------------------------------------------
56
+ @spaces.GPU(duration=300)
57
+ def generate_voice(script: str, speaker: str):
58
+ try:
59
+ # Replace with your chosen TTS model
60
+ tts_model = "coqui/XTTS-v2"
61
+ processor = AutoProcessor.from_pretrained(tts_model)
62
+ model = AutoModelForCausalLM.from_pretrained(tts_model)
63
+
64
+ inputs = processor(script, return_tensors="pt")
65
+ speech = model.generate(**inputs)
66
+
67
+ output_path = f"{tempfile.gettempdir()}/generated_voice.wav"
68
+ write(output_path, 22050, speech.cpu().numpy())
69
+ return output_path
70
  except Exception as e:
71
+ return f"Error generating voice-over: {e}"
72
 
73
  # ---------------------------------------------------------------------
74
+ # Music Generation Function
75
  # ---------------------------------------------------------------------
76
  @spaces.GPU(duration=300)
77
+ def generate_music(prompt: str, audio_length: int):
78
  try:
79
  musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
80
  musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
 
87
 
88
  audio_data = outputs[0, 0].cpu().numpy()
89
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
90
+
91
  output_path = f"{tempfile.gettempdir()}/generated_music.wav"
92
+ write(output_path, 44100, normalized_audio)
93
 
94
  return output_path
95
  except Exception as e:
96
+ return f"Error generating music: {e}"
97
 
98
  # ---------------------------------------------------------------------
99
+ # Audio Blending Function with Ducking
100
  # ---------------------------------------------------------------------
101
+ def blend_audio(voice_path: str, music_path: str, ducking: bool):
 
102
  try:
103
+ voice = AudioSegment.from_file(voice_path)
104
+ music = AudioSegment.from_file(music_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  if ducking:
107
+ music = music - 10 # Lower music volume for ducking
 
108
 
109
+ combined = music.overlay(voice)
110
+ output_path = f"{tempfile.gettempdir()}/final_promo.wav"
111
+ combined.export(output_path, format="wav")
112
+
113
+ return output_path
114
  except Exception as e:
115
+ return f"Error blending audio: {e}"
116
 
117
  # ---------------------------------------------------------------------
118
+ # Gradio Interface
119
  # ---------------------------------------------------------------------
120
+ def process_all(user_prompt, llama_model_id, duration, audio_length, speaker, ducking):
121
+ script, music_suggestion = generate_script(user_prompt, llama_model_id, hf_token, duration)
122
+ if "Error" in script:
123
+ return script, None
124
 
125
+ voice_path = generate_voice(script, speaker)
126
+ if "Error" in voice_path:
127
+ return voice_path, None
128
 
129
+ music_path = generate_music(music_suggestion, audio_length)
130
+ if "Error" in music_path:
131
+ return music_path, None
132
 
133
+ final_audio = blend_audio(voice_path, music_path, ducking)
134
+ return f"Script:\n{script}\n\nMusic Suggestion:\n{music_suggestion}", final_audio
 
135
 
 
 
 
136
  with gr.Blocks() as demo:
137
+ gr.Markdown("""
138
+ # 🎧 AI Promo Studio with Script, Voice, Music, and Mixing 🚀
139
+ Generate fully mixed promos effortlessly with AI-driven tools for radio and media!
140
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
 
142
  with gr.Row():
143
+ user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
144
+ llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
145
+ duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)
146
+ audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
147
+ speaker = gr.Textbox(label="Voice Style (optional)", placeholder="E.g., male, female, or neutral.")
148
+ ducking = gr.Checkbox(label="Enable Ducking", value=True)
149
+
150
+ generate_button = gr.Button("Generate Full Promo")
151
+ script_output = gr.Textbox(label="Generated Script and Music Suggestion")
152
+ audio_output = gr.Audio(label="Final Promo Audio", type="filepath")
153
+
154
+ generate_button.click(
155
+ fn=process_all,
156
+ inputs=[user_prompt, llama_model_id, duration, audio_length, speaker, ducking],
157
+ outputs=[script_output, audio_output],
158
+ )
159
 
160
+ gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  <hr>
162
+ <p style="text-align: center; font-size: 0.9em;">
163
+ Created with ❤️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
164
+ </p>
165
+ """)
166
 
 
167
  demo.launch(debug=True)