Bils commited on
Commit
db46bfb
·
verified ·
1 Parent(s): 2f5a95d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -70
app.py CHANGED
@@ -1,110 +1,250 @@
1
  import streamlit as st
2
- from transformers import AutoProcessor, MusicgenForConditionalGeneration
3
- import scipy.io.wavfile
4
- import openai
5
  import torch
 
 
 
 
 
 
 
 
6
 
7
-
8
- # Streamlit app setup
 
9
  st.set_page_config(
10
- page_icon="https://soundboard.bilsimaging.com/faviconbilsimaging.png",
11
  layout="wide",
12
- page_title="Radio Imaging Audio Generator Beta 0.1",
13
  initial_sidebar_state="expanded",
14
  )
15
 
16
- # App Header
17
- st.markdown("""
18
- <h1 style=''>Radio Imaging Audio Generator
19
- <span style='font-size: 24px; color: #FDC74A;'>Beta 0.1</span></h1>
20
- """, unsafe_allow_html=True)
21
- st.write("Welcome to the Radio Imaging & MusicGen AI audio generator. Easily create unique audio for your radio imaging projects or for music creation using cutting-edge AI technology.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  st.markdown("---")
23
 
24
- # Instructions Section
 
 
25
  with st.expander("📘 How to Use This Web App"):
26
- st.markdown("""
27
- 1. **Enter OpenAI API Key**: Provide your API key in the sidebar to access the GPT model.
28
- 2. **Select GPT Model**: Choose the desired model, such as `gpt-3.5-turbo-16k`.
29
- 3. **Write a Description**: Provide a detailed description of your desired audio.
30
- 4. **Generate and Review the Prompt**: Generate a description and review the output.
31
- 5. **Generate Audio**: Use the description to create your audio file.
32
- 6. **Playback and Download**: Listen to or download the generated audio.
33
- """)
 
 
 
 
 
34
 
35
- # Sidebar Inputs
 
 
36
  with st.sidebar:
37
- openai_api_key = st.text_input("🔑 OpenAI API Key", type="password", help="Enter your OpenAI API key.")
38
- st.caption("Need an API key? Get one [here](https://platform.openai.com/account/api-keys).")
39
- model = st.selectbox("🛠 Choose GPT Model", options=("gpt-3.5-turbo", "gpt-3.5-turbo-16k"))
 
 
 
 
 
 
 
 
 
40
 
 
41
  # Prompt Input
42
- st.markdown("## ✍🏻 Write Your Description")
 
43
  prompt = st.text_area(
44
- "Describe the audio you'd like to generate.",
45
- help="Include details like mood, instruments, style, or purpose (e.g., calm background music for a morning show)."
46
  )
47
 
48
- # Generate Prompt
49
- if st.button("📄 Generate Prompt"):
50
- if not openai_api_key.strip() or not prompt.strip():
51
- st.error("Please provide both an OpenAI API key and a description.")
52
- else:
53
- with st.spinner("Generating your prompt... Please wait."):
54
- try:
55
- # Create a prompt and get response from OpenAI
56
- full_prompt = {"role": "user", "content": f"Describe a radio imaging audio piece based on: {prompt}"}
57
- response = openai.ChatCompletion.create(model=model, messages=[full_prompt], api_key=openai_api_key)
58
- descriptive_text = response.choices[0].message['content'].strip()
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- # Append a credit line
61
- descriptive_text += "\n\n© Created using Radio Imaging Audio Generator by Bilsimaging"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Save to session state
64
- st.session_state['generated_prompt'] = descriptive_text
65
- st.success("Prompt successfully generated!")
66
- st.write(descriptive_text)
67
- st.download_button("📥 Download Prompt", descriptive_text, file_name="generated_prompt.txt")
 
 
 
 
 
 
 
 
 
 
 
 
68
  except Exception as e:
69
- st.error(f"Error while generating prompt: {e}")
70
 
71
  st.markdown("---")
72
 
73
- # Cache Model Loading
 
 
74
  @st.cache_resource
75
- def load_model():
76
  """Load and cache the MusicGen model and processor."""
77
- model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
78
- processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
79
- return model, processor
80
 
81
- # Generate Audio
82
- if st.button("▶ Generate Audio"):
83
- if 'generated_prompt' not in st.session_state or not st.session_state['generated_prompt']:
84
- st.error("Please generate and approve a prompt before creating audio.")
85
  else:
86
- descriptive_text = st.session_state['generated_prompt']
87
- with st.spinner("Generating your audio... This might take a few moments."):
88
  try:
89
- # Load model and processor
90
- musicgen_model, processor = load_model()
91
-
92
- # Generate audio from the prompt
93
- inputs = processor(text=[descriptive_text], padding=True, return_tensors="pt")
 
 
94
  audio_values = musicgen_model.generate(**inputs, max_new_tokens=512)
95
  sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
96
 
97
- # Save and display the audio
98
- audio_filename = "Bilsimaging_radio_imaging_output.wav"
99
- scipy.io.wavfile.write(audio_filename, rate=sampling_rate, data=audio_values[0, 0].numpy())
 
 
 
 
100
  st.success("Audio successfully generated!")
101
  st.audio(audio_filename)
102
  except Exception as e:
103
  st.error(f"Error while generating audio: {e}")
104
 
 
105
  # Footer Section
 
106
  st.markdown("---")
107
- st.markdown("""
108
- ✔️ Made with ❤️ by [Bilsimaging](https://bilsimaging.com). Your feedback and support help us grow!
109
- """)
 
 
 
 
 
 
110
  st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True)
 
1
  import streamlit as st
 
 
 
2
  import torch
3
+ import scipy.io.wavfile
4
+ from transformers import (
5
+ AutoTokenizer,
6
+ AutoModelForCausalLM,
7
+ pipeline,
8
+ AutoProcessor,
9
+ MusicgenForConditionalGeneration
10
+ )
11
 
12
+ # ---------------------------------------------------------------------
13
+ # Page Configuration
14
+ # ---------------------------------------------------------------------
15
  st.set_page_config(
16
+ page_icon="🎧",
17
  layout="wide",
18
+ page_title="Radio Imaging Audio Generator - Llama & MusicGen",
19
  initial_sidebar_state="expanded",
20
  )
21
 
22
+ # ---------------------------------------------------------------------
23
+ # Custom CSS for a Vibrant UI
24
+ # ---------------------------------------------------------------------
25
+ CUSTOM_CSS = """
26
+ <style>
27
+ body {
28
+ background-color: #F8FBFE;
29
+ color: #1F2937;
30
+ font-family: 'Segoe UI', Tahoma, sans-serif;
31
+ }
32
+ h1, h2, h3, h4, h5, h6 {
33
+ color: #3B82F6;
34
+ }
35
+ .stButton>button {
36
+ background-color: #3B82F6 !important;
37
+ color: #FFFFFF !important;
38
+ border-radius: 8px !important;
39
+ font-size: 16px !important;
40
+ }
41
+ .sidebar .sidebar-content {
42
+ background: #E0F2FE;
43
+ }
44
+ .material-card {
45
+ border: 1px solid #D1D5DB;
46
+ border-radius: 8px;
47
+ padding: 1rem;
48
+ margin-bottom: 1rem;
49
+ background-color: #ffffff;
50
+ }
51
+ .footer-note {
52
+ text-align: center;
53
+ opacity: 0.6;
54
+ font-size: 14px;
55
+ margin-top: 30px;
56
+ }
57
+ </style>
58
+ """
59
+ st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
60
+
61
+ # ---------------------------------------------------------------------
62
+ # Header Section
63
+ # ---------------------------------------------------------------------
64
+ st.markdown(
65
+ """
66
+ <h1>Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta)</span></h1>
67
+ <p style='font-size:18px;'>
68
+ Generate custom radio imaging audio, ads, and promo tracks with Llama & MusicGen!
69
+ </p>
70
+ """,
71
+ unsafe_allow_html=True
72
+ )
73
  st.markdown("---")
74
 
75
+ # ---------------------------------------------------------------------
76
+ # Instructions Section in an Expander
77
+ # ---------------------------------------------------------------------
78
  with st.expander("📘 How to Use This Web App"):
79
+ st.markdown(
80
+ """
81
+ 1. **Enter your prompt**: Describe the type of audio you need (e.g., an energetic 15-second jingle for a pop radio promo).
82
+ 2. **Generate Description**: Let Llama 2 (or another open-source model) refine your prompt into a creative script.
83
+ 3. **Generate Audio**: Pass that script to MusicGen to get a custom audio file.
84
+ 4. **Playback & Download**: Listen to your new track and download it for further editing.
85
+
86
+ **Tips**:
87
+ - Keep descriptions short & specific for best results.
88
+ - If the Llama model is too large, switch to a smaller open-source model or try a GPU-based environment.
89
+ - If you see errors about model permissions, ensure you’ve accepted the license on Hugging Face.
90
+ """
91
+ )
92
 
93
+ # ---------------------------------------------------------------------
94
+ # Sidebar: Model Selection & Options
95
+ # ---------------------------------------------------------------------
96
  with st.sidebar:
97
+ st.header("🔧 Model Config")
98
+ # Llama 2 chat model from Hugging Face
99
+ llama_model_id = st.text_input(
100
+ "Llama 2 Model ID on Hugging Face",
101
+ value="meta-llama/Llama-2-7b-chat-hf",
102
+ help="For example: meta-llama/Llama-2-7b-chat-hf (requires license acceptance)."
103
+ )
104
+ device_option = st.selectbox(
105
+ "Hardware Device",
106
+ ["auto", "cpu"],
107
+ help="If running locally with a GPU, choose 'auto'. If you only have a CPU, pick 'cpu'."
108
+ )
109
 
110
+ # ---------------------------------------------------------------------
111
  # Prompt Input
112
+ # ---------------------------------------------------------------------
113
+ st.markdown("## ✍🏻 Write Your Brief / Concept")
114
  prompt = st.text_area(
115
+ "Describe the radio imaging or jingle you want to create. Include style, mood, duration, etc.",
116
+ placeholder="e.g. 'An energetic 15-second pop jingle for a morning radio show, upbeat and fun...'"
117
  )
118
 
119
+ # ---------------------------------------------------------------------
120
+ # Text Generation with Llama
121
+ # ---------------------------------------------------------------------
122
+ @st.cache_resource
123
+ def load_llama_pipeline(model_id: str, device: str):
124
+ """
125
+ Load the Llama or other open-source model as a text-generation pipeline.
126
+ The user must have accepted the license for certain models like Llama 2.
127
+ """
128
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
129
+ model = AutoModelForCausalLM.from_pretrained(
130
+ model_id,
131
+ torch_dtype=torch.float16 if device == "auto" else torch.float32,
132
+ device_map=device
133
+ )
134
+ gen_pipeline = pipeline(
135
+ "text-generation",
136
+ model=model,
137
+ tokenizer=tokenizer,
138
+ device_map=device
139
+ )
140
+ return gen_pipeline
141
 
142
+ def generate_description(user_prompt: str, pipeline_gen):
143
+ """
144
+ Use the pipeline to create a refined description for MusicGen.
145
+ """
146
+ # Instruction format for Llama 2 chat
147
+ # or simpler prompt if it's not a chat model
148
+ system_prompt = (
149
+ "You are a helpful assistant specialized in creative advertising scripts and radio imaging. "
150
+ "Refine the user's short concept into a more detailed, creative script. "
151
+ "Keep it concise, but highlight any relevant tone, instruments, or style to guide music generation."
152
+ )
153
+
154
+ # We'll feed a combined prompt
155
+ combined_prompt = f"{system_prompt}\nUser request: {user_prompt}\nYour refined script:"
156
+
157
+ # Generate text
158
+ result = pipeline_gen(
159
+ combined_prompt,
160
+ max_new_tokens=200,
161
+ do_sample=True,
162
+ temperature=0.7
163
+ )
164
+ # Extract generated text (some models output extra tokens or the entire prompt again)
165
+ generated_text = result[0]["generated_text"]
166
+
167
+ # Attempt to cut out the system prompt if it reappears
168
+ # Just a heuristic: find the last occurrence of "script:" or any relevant marker
169
+ if "script:" in generated_text.lower():
170
+ generated_text = generated_text.split("script:")[-1].strip()
171
+
172
+ # Optional: add a sign-off or credit line
173
+ generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Llama Edition)"
174
+ return generated_text
175
 
176
+ # Button: Generate Description
177
+ if st.button("📄 Refine Description with Llama"):
178
+ if not prompt.strip():
179
+ st.error("Please provide a brief concept before generating a description.")
180
+ else:
181
+ with st.spinner("Generating a refined description..."):
182
+ try:
183
+ pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
184
+ refined_text = generate_description(prompt, pipeline_llama)
185
+ st.session_state['refined_prompt'] = refined_text
186
+ st.success("Description successfully refined!")
187
+ st.write(refined_text)
188
+ st.download_button(
189
+ "📥 Download Description",
190
+ refined_text,
191
+ file_name="refined_description.txt"
192
+ )
193
  except Exception as e:
194
+ st.error(f"Error while generating with Llama: {e}")
195
 
196
  st.markdown("---")
197
 
198
+ # ---------------------------------------------------------------------
199
+ # MusicGen: Generate Audio
200
+ # ---------------------------------------------------------------------
201
  @st.cache_resource
202
+ def load_musicgen_model():
203
  """Load and cache the MusicGen model and processor."""
204
+ mg_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
205
+ mg_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
206
+ return mg_model, mg_processor
207
 
208
+ if st.button("▶ Generate Audio with MusicGen"):
209
+ if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
210
+ st.error("Please generate or have a refined description first.")
 
211
  else:
212
+ descriptive_text = st.session_state['refined_prompt']
213
+ with st.spinner("Generating your audio... This can take a moment."):
214
  try:
215
+ musicgen_model, processor = load_musicgen_model()
216
+ # Use the refined prompt as input
217
+ inputs = processor(
218
+ text=[descriptive_text],
219
+ padding=True,
220
+ return_tensors="pt"
221
+ )
222
  audio_values = musicgen_model.generate(**inputs, max_new_tokens=512)
223
  sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
224
 
225
+ # Save & display the audio
226
+ audio_filename = "radio_imaging_output.wav"
227
+ scipy.io.wavfile.write(
228
+ audio_filename,
229
+ rate=sampling_rate,
230
+ data=audio_values[0, 0].numpy()
231
+ )
232
  st.success("Audio successfully generated!")
233
  st.audio(audio_filename)
234
  except Exception as e:
235
  st.error(f"Error while generating audio: {e}")
236
 
237
+ # ---------------------------------------------------------------------
238
  # Footer Section
239
+ # ---------------------------------------------------------------------
240
  st.markdown("---")
241
+ st.markdown(
242
+ "<div class='footer-note'>"
243
+ "✅ Built with Llama 2 & MusicGen · "
244
+ "Created for radio imaging producers · "
245
+ "Feedback welcome at <a href='https://bilsimaging.com' target='_blank'>Bilsimaging</a>!"
246
+ "</div>",
247
+ unsafe_allow_html=True
248
+ )
249
+ # Hide Streamlit's default menu and footer if you wish
250
  st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True)