Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Jan 11

Commit

cf3593c

verified ·

1 Parent(s): 6b30d40

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -79

app.py CHANGED Viewed

@@ -8,15 +8,17 @@ from transformers import (
     AutoProcessor,
     MusicgenForConditionalGeneration
 )
-import scipy.io.wavfile as wav
-# ---------------------------------------------------------------------
-# Load Llama 3 Model with Zero GPU
-# ---------------------------------------------------------------------
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
     try:
-        if not torch.cuda.is_available():
-            raise RuntimeError("ZeroGPU is not properly initialized or GPU is unavailable.")
         tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
@@ -27,94 +29,41 @@ def load_llama_pipeline_zero_gpu(model_id: str, token: str):
         )
         return pipeline("text-generation", model=model, tokenizer=tokenizer)
     except Exception as e:
-        return f"Error loading model: {e}"
-# ---------------------------------------------------------------------
-# Generate Radio Script
-# ---------------------------------------------------------------------
-def generate_script(user_input: str, pipeline_llama):
-    try:
-        system_prompt = (
-            "You are a top-tier radio imaging producer using Llama 3. "
-            "Take the user's concept and craft a short, creative promo script."
-        )
-        combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
-        result = pipeline_llama(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
-        return result[0]['generated_text'].split("Refined script:")[-1].strip()
-    except Exception as e:
-        return f"Error generating script: {e}"
-# ---------------------------------------------------------------------
-# Load MusicGen Model
-# ---------------------------------------------------------------------
-def load_musicgen_model():
-    try:
-        model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-        return model, processor
-    except Exception as e:
-        return None, str(e)
-# ---------------------------------------------------------------------
-# Generate Audio
-# ---------------------------------------------------------------------
 def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
     try:
         inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
         outputs = mg_model.generate(**inputs, max_new_tokens=audio_length)
         sr = mg_model.config.audio_encoder.sampling_rate
         audio_data = outputs[0, 0].cpu().numpy()
         normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
-        output_file = "radio_jingle.wav"
-        wav.write(output_file, rate=sr, data=normalized_audio)
-        return sr, normalized_audio
-    except Exception as e:
-        return str(e)
-# ---------------------------------------------------------------------
-# Gradio Interface
-# ---------------------------------------------------------------------
-def radio_imaging_app(user_prompt, llama_model_id, hf_token, audio_length):
-    # Load Llama 3 Pipeline with Zero GPU
-    pipeline_llama = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
-    if isinstance(pipeline_llama, str):
-        return pipeline_llama, None
-    # Generate Script
-    script = generate_script(user_prompt, pipeline_llama)
-    # Load MusicGen
-    mg_model, mg_processor = load_musicgen_model()
-    if isinstance(mg_processor, str):
-        return script, mg_processor
-    # Generate Audio
-    audio_data = generate_audio(script, audio_length, mg_model, mg_processor)
-    if isinstance(audio_data, str):
-        return script, audio_data
-    return script, audio_data
-# ---------------------------------------------------------------------
-# Interface
-# ---------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# 🎧 AI Radio Imaging with Llama 3 + MusicGen (Zero GPU)")
-    with gr.Row():
-        user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show, fun and energetic.")
-        llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-70B")
-        hf_token = gr.Textbox(label="Hugging Face Token", type="password")
-        audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
     generate_button = gr.Button("Generate Promo Script and Audio")
     script_output = gr.Textbox(label="Generated Script")
-    audio_output = gr.Audio(label="Generated Audio", type="numpy")
-    generate_button.click(radio_imaging_app,
-                          inputs=[user_prompt, llama_model_id, hf_token, audio_length],
-                          outputs=[script_output, audio_output])
-# ---------------------------------------------------------------------
-# Launch App
-# ---------------------------------------------------------------------
 demo.launch(debug=True)

     AutoProcessor,
     MusicgenForConditionalGeneration
 )
+from scipy.io.wavfile import write
+import tempfile
+from dotenv import load_dotenv
+import spaces
+load_dotenv()
+hf_token = os.getenv("HF_TOKEN")
+@spaces.GPU(duration=120)
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
     try:
         tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
         )
         return pipeline("text-generation", model=model, tokenizer=tokenizer)
     except Exception as e:
+        return str(e)
+@spaces.GPU(duration=120)
 def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
     try:
+        mg_model.to("cuda")
         inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
         outputs = mg_model.generate(**inputs, max_new_tokens=audio_length)
+        mg_model.to("cpu")
         sr = mg_model.config.audio_encoder.sampling_rate
         audio_data = outputs[0, 0].cpu().numpy()
         normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
+            write(temp_wav.name, sr, normalized_audio)
+            return temp_wav.name
+    except Exception as e:
+        return f"Error generating audio: {e}"
 with gr.Blocks() as demo:
     gr.Markdown("# 🎧 AI Radio Imaging with Llama 3 + MusicGen (Zero GPU)")
+    user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show.")
+    llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-70B")
+    hf_token = gr.Textbox(label="Hugging Face Token", type="password")
+    audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
     generate_button = gr.Button("Generate Promo Script and Audio")
     script_output = gr.Textbox(label="Generated Script")
+    audio_output = gr.Audio(label="Generated Audio", type="filepath")
+    generate_button.click(
+        fn=lambda prompt, model_id, token, length: (prompt, None),  # Simplify for demo
+        inputs=[user_prompt, llama_model_id, hf_token, audio_length],
+        outputs=[script_output, audio_output]
+    )
 demo.launch(debug=True)