Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Jan 9

Commit

db46bfb

verified ·

1 Parent(s): 2f5a95d

Update app.py

Browse files

Files changed (1) hide show

app.py +210 -70

app.py CHANGED Viewed

@@ -1,110 +1,250 @@
 import streamlit as st
-from transformers import AutoProcessor, MusicgenForConditionalGeneration
-import scipy.io.wavfile
-import openai
 import torch
-# Streamlit app setup
 st.set_page_config(
-    page_icon="https://soundboard.bilsimaging.com/faviconbilsimaging.png",
     layout="wide",
-    page_title="Radio Imaging Audio Generator Beta 0.1",
     initial_sidebar_state="expanded",
 )
-# App Header
-st.markdown("""
-    <h1 style=''>Radio Imaging Audio Generator
-    <span style='font-size: 24px; color: #FDC74A;'>Beta 0.1</span></h1>
-    """, unsafe_allow_html=True)
-st.write("Welcome to the Radio Imaging & MusicGen AI audio generator. Easily create unique audio for your radio imaging projects or for music creation using cutting-edge AI technology.")
 st.markdown("---")
-# Instructions Section
 with st.expander("📘 How to Use This Web App"):
-    st.markdown("""
-        1. **Enter OpenAI API Key**: Provide your API key in the sidebar to access the GPT model.
-        2. **Select GPT Model**: Choose the desired model, such as `gpt-3.5-turbo-16k`.
-        3. **Write a Description**: Provide a detailed description of your desired audio.
-        4. **Generate and Review the Prompt**: Generate a description and review the output.
-        5. **Generate Audio**: Use the description to create your audio file.
-        6. **Playback and Download**: Listen to or download the generated audio.
-    """)
-# Sidebar Inputs
 with st.sidebar:
-    openai_api_key = st.text_input("🔑 OpenAI API Key", type="password", help="Enter your OpenAI API key.")
-    st.caption("Need an API key? Get one [here](https://platform.openai.com/account/api-keys).")
-    model = st.selectbox("🛠 Choose GPT Model", options=("gpt-3.5-turbo", "gpt-3.5-turbo-16k"))
 # Prompt Input
-st.markdown("## ✍🏻 Write Your Description")
 prompt = st.text_area(
-    "Describe the audio you'd like to generate.",
-    help="Include details like mood, instruments, style, or purpose (e.g., calm background music for a morning show)."
 )
-# Generate Prompt
-if st.button("📄 Generate Prompt"):
-    if not openai_api_key.strip() or not prompt.strip():
-        st.error("Please provide both an OpenAI API key and a description.")
-    else:
-        with st.spinner("Generating your prompt... Please wait."):
-            try:
-                # Create a prompt and get response from OpenAI
-                full_prompt = {"role": "user", "content": f"Describe a radio imaging audio piece based on: {prompt}"}
-                response = openai.ChatCompletion.create(model=model, messages=[full_prompt], api_key=openai_api_key)
-                descriptive_text = response.choices[0].message['content'].strip()
-                # Append a credit line
-                descriptive_text += "\n\n© Created using Radio Imaging Audio Generator by Bilsimaging"
-                # Save to session state
-                st.session_state['generated_prompt'] = descriptive_text
-                st.success("Prompt successfully generated!")
-                st.write(descriptive_text)
-                st.download_button("📥 Download Prompt", descriptive_text, file_name="generated_prompt.txt")
             except Exception as e:
-                st.error(f"Error while generating prompt: {e}")
 st.markdown("---")
-# Cache Model Loading
 @st.cache_resource
-def load_model():
     """Load and cache the MusicGen model and processor."""
-    model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-    processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-    return model, processor
-# Generate Audio
-if st.button("▶ Generate Audio"):
-    if 'generated_prompt' not in st.session_state or not st.session_state['generated_prompt']:
-        st.error("Please generate and approve a prompt before creating audio.")
     else:
-        descriptive_text = st.session_state['generated_prompt']
-        with st.spinner("Generating your audio... This might take a few moments."):
             try:
-                # Load model and processor
-                musicgen_model, processor = load_model()
-                # Generate audio from the prompt
-                inputs = processor(text=[descriptive_text], padding=True, return_tensors="pt")
                 audio_values = musicgen_model.generate(**inputs, max_new_tokens=512)
                 sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
-                # Save and display the audio
-                audio_filename = "Bilsimaging_radio_imaging_output.wav"
-                scipy.io.wavfile.write(audio_filename, rate=sampling_rate, data=audio_values[0, 0].numpy())
                 st.success("Audio successfully generated!")
                 st.audio(audio_filename)
             except Exception as e:
                 st.error(f"Error while generating audio: {e}")
 # Footer Section
 st.markdown("---")
-st.markdown("""
-    ✔️ Made with ❤️ by [Bilsimaging](https://bilsimaging.com). Your feedback and support help us grow!
-    """)
 st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True)

 import streamlit as st
 import torch
+import scipy.io.wavfile
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    pipeline,
+    AutoProcessor,
+    MusicgenForConditionalGeneration
+)
+# ---------------------------------------------------------------------
+# Page Configuration
+# ---------------------------------------------------------------------
 st.set_page_config(
+    page_icon="🎧",
     layout="wide",
+    page_title="Radio Imaging Audio Generator - Llama & MusicGen",
     initial_sidebar_state="expanded",
 )
+# ---------------------------------------------------------------------
+# Custom CSS for a Vibrant UI
+# ---------------------------------------------------------------------
+CUSTOM_CSS = """
+<style>
+body {
+    background-color: #F8FBFE;
+    color: #1F2937;
+    font-family: 'Segoe UI', Tahoma, sans-serif;
+}
+h1, h2, h3, h4, h5, h6 {
+    color: #3B82F6;
+}
+.stButton>button {
+    background-color: #3B82F6 !important;
+    color: #FFFFFF !important;
+    border-radius: 8px !important;
+    font-size: 16px !important;
+}
+.sidebar .sidebar-content {
+    background: #E0F2FE;
+}
+.material-card {
+    border: 1px solid #D1D5DB;
+    border-radius: 8px;
+    padding: 1rem;
+    margin-bottom: 1rem;
+    background-color: #ffffff;
+}
+.footer-note {
+    text-align: center;
+    opacity: 0.6;
+    font-size: 14px;
+    margin-top: 30px;
+}
+</style>
+"""
+st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
+# ---------------------------------------------------------------------
+# Header Section
+# ---------------------------------------------------------------------
+st.markdown(
+    """
+    <h1>Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta)</span></h1>
+    <p style='font-size:18px;'>
+        Generate custom radio imaging audio, ads, and promo tracks with Llama & MusicGen!
+    </p>
+    """,
+    unsafe_allow_html=True
+)
 st.markdown("---")
+# ---------------------------------------------------------------------
+# Instructions Section in an Expander
+# ---------------------------------------------------------------------
 with st.expander("📘 How to Use This Web App"):
+    st.markdown(
+        """
+        1. **Enter your prompt**: Describe the type of audio you need (e.g., an energetic 15-second jingle for a pop radio promo).
+        2. **Generate Description**: Let Llama 2 (or another open-source model) refine your prompt into a creative script.
+        3. **Generate Audio**: Pass that script to MusicGen to get a custom audio file.
+        4. **Playback & Download**: Listen to your new track and download it for further editing.
+        **Tips**:
+        - Keep descriptions short & specific for best results.
+        - If the Llama model is too large, switch to a smaller open-source model or try a GPU-based environment.
+        - If you see errors about model permissions, ensure you’ve accepted the license on Hugging Face.
+        """
+    )
+# ---------------------------------------------------------------------
+# Sidebar: Model Selection & Options
+# ---------------------------------------------------------------------
 with st.sidebar:
+    st.header("🔧 Model Config")
+    # Llama 2 chat model from Hugging Face
+    llama_model_id = st.text_input(
+        "Llama 2 Model ID on Hugging Face",
+        value="meta-llama/Llama-2-7b-chat-hf",
+        help="For example: meta-llama/Llama-2-7b-chat-hf (requires license acceptance)."
+    )
+    device_option = st.selectbox(
+        "Hardware Device",
+        ["auto", "cpu"],
+        help="If running locally with a GPU, choose 'auto'. If you only have a CPU, pick 'cpu'."
+    )
+# ---------------------------------------------------------------------
 # Prompt Input
+# ---------------------------------------------------------------------
+st.markdown("## ✍🏻 Write Your Brief / Concept")
 prompt = st.text_area(
+    "Describe the radio imaging or jingle you want to create. Include style, mood, duration, etc.",
+    placeholder="e.g. 'An energetic 15-second pop jingle for a morning radio show, upbeat and fun...'"
 )
+# ---------------------------------------------------------------------
+# Text Generation with Llama
+# ---------------------------------------------------------------------
+@st.cache_resource
+def load_llama_pipeline(model_id: str, device: str):
+    """
+    Load the Llama or other open-source model as a text-generation pipeline.
+    The user must have accepted the license for certain models like Llama 2.
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16 if device == "auto" else torch.float32,
+        device_map=device
+    )
+    gen_pipeline = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device_map=device
+    )
+    return gen_pipeline
+def generate_description(user_prompt: str, pipeline_gen):
+    """
+    Use the pipeline to create a refined description for MusicGen.
+    """
+    # Instruction format for Llama 2 chat
+    # or simpler prompt if it's not a chat model
+    system_prompt = (
+        "You are a helpful assistant specialized in creative advertising scripts and radio imaging. "
+        "Refine the user's short concept into a more detailed, creative script. "
+        "Keep it concise, but highlight any relevant tone, instruments, or style to guide music generation."
+    )
+    # We'll feed a combined prompt
+    combined_prompt = f"{system_prompt}\nUser request: {user_prompt}\nYour refined script:"
+    # Generate text
+    result = pipeline_gen(
+        combined_prompt,
+        max_new_tokens=200,
+        do_sample=True,
+        temperature=0.7
+    )
+    # Extract generated text (some models output extra tokens or the entire prompt again)
+    generated_text = result[0]["generated_text"]
+    # Attempt to cut out the system prompt if it reappears
+    # Just a heuristic: find the last occurrence of "script:" or any relevant marker
+    if "script:" in generated_text.lower():
+        generated_text = generated_text.split("script:")[-1].strip()
+    # Optional: add a sign-off or credit line
+    generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Llama Edition)"
+    return generated_text
+# Button: Generate Description
+if st.button("📄 Refine Description with Llama"):
+    if not prompt.strip():
+        st.error("Please provide a brief concept before generating a description.")
+    else:
+        with st.spinner("Generating a refined description..."):
+            try:
+                pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
+                refined_text = generate_description(prompt, pipeline_llama)
+                st.session_state['refined_prompt'] = refined_text
+                st.success("Description successfully refined!")
+                st.write(refined_text)
+                st.download_button(
+                    "📥 Download Description",
+                    refined_text,
+                    file_name="refined_description.txt"
+                )
             except Exception as e:
+                st.error(f"Error while generating with Llama: {e}")
 st.markdown("---")
+# ---------------------------------------------------------------------
+# MusicGen: Generate Audio
+# ---------------------------------------------------------------------
 @st.cache_resource
+def load_musicgen_model():
     """Load and cache the MusicGen model and processor."""
+    mg_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+    mg_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+    return mg_model, mg_processor
+if st.button("▶ Generate Audio with MusicGen"):
+    if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
+        st.error("Please generate or have a refined description first.")
     else:
+        descriptive_text = st.session_state['refined_prompt']
+        with st.spinner("Generating your audio... This can take a moment."):
             try:
+                musicgen_model, processor = load_musicgen_model()
+                # Use the refined prompt as input
+                inputs = processor(
+                    text=[descriptive_text],
+                    padding=True,
+                    return_tensors="pt"
+                )
                 audio_values = musicgen_model.generate(**inputs, max_new_tokens=512)
                 sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
+                # Save & display the audio
+                audio_filename = "radio_imaging_output.wav"
+                scipy.io.wavfile.write(
+                    audio_filename,
+                    rate=sampling_rate,
+                    data=audio_values[0, 0].numpy()
+                )
                 st.success("Audio successfully generated!")
                 st.audio(audio_filename)
             except Exception as e:
                 st.error(f"Error while generating audio: {e}")
+# ---------------------------------------------------------------------
 # Footer Section
+# ---------------------------------------------------------------------
 st.markdown("---")
+st.markdown(
+    "<div class='footer-note'>"
+    "✅ Built with Llama 2 & MusicGen · "
+    "Created for radio imaging producers · "
+    "Feedback welcome at <a href='https://bilsimaging.com' target='_blank'>Bilsimaging</a>!"
+    "</div>",
+    unsafe_allow_html=True
+)
+# Hide Streamlit's default menu and footer if you wish
 st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True)