see-2-sound

Runtime error

App Files Files Community

jadechoghari commited on Oct 12, 2024

Commit

9a68e0a

verified ·

1 Parent(s): 372ade2

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -93

app.py CHANGED Viewed

@@ -1,79 +1,28 @@
 from typing import Tuple, Union
 import gradio as gr
-import numpy as np
-import see2sound
-import spaces
-import torch
-import yaml
 import os
-from huggingface_hub import snapshot_download
 from PIL import Image
-model_id = "rishitdagli/see-2-sound"
-base_path = snapshot_download(repo_id=model_id)
-# load and update the configuration
-with open("config.yaml", "r") as file:
-    data = yaml.safe_load(file)
-data_str = yaml.dump(data)
-updated_data_str = data_str.replace("checkpoints", base_path)
-updated_data = yaml.safe_load(updated_data_str)
-with open("config.yaml", "w") as file:
-    yaml.safe_dump(updated_data, file)
-model = see2sound.See2Sound(config_path="config.yaml")
-model.setup()
 CACHE_DIR = "gradio_cached_examples"
-# function to create cached output directory
-def create_cache_dir(image_path):
-    image_name = os.path.basename(image_path).split('.')[0]
-    cached_dir = os.path.join(CACHE_DIR, image_name)
-    os.makedirs(cached_dir, exist_ok=True)
-    return cached_dir
-# fn to process image and cache outputs
-@spaces.GPU(duration=280)
-@torch.no_grad()
-def process_image(
-    image: str, num_audios: int, prompt: Union[str, None], steps: Union[int, None]
-) -> Tuple[str, str]:
-    cached_dir = create_cache_dir(image)
     cached_image_path = os.path.join(cached_dir, "processed_image.png")
     cached_audio_path = os.path.join(cached_dir, "audio.wav")
-    # check if cached outputs exist, if yes, return them
     if os.path.exists(cached_image_path) and os.path.exists(cached_audio_path):
         return cached_image_path, cached_audio_path
-    # run the model if outputs are not cached
-    model.run(
-        path=image,
-        output_path=cached_audio_path,  # Save audio in cache directory
-        num_audios=num_audios,
-        prompt=prompt,
-        steps=steps,
-    )
-    # save the processed image to the cache directory (use original image or any transformations)
-    processed_image = Image.open(image)  # Assuming image is a file path
-    processed_image.save(cached_image_path)
-    return cached_image_path, cached_audio_path
 description_text = """# SEE-2-SOUND 🔊 Demo
 Official demo for *SEE-2-SOUND 🔊: Zero-Shot Spatial Environment-to-Spatial Sound*.
-Please refer to our [paper](https://arxiv.org/abs/2406.06612), [project page](https://see2sound.github.io/), or [github](https://github.com/see2sound/see2sound) for more details.
-> Note: You should make sure that your hardware supports spatial audio.
 """
 css = """
-h1 {
-    text-align: center;
-}
 """
 with gr.Blocks(css=css) as demo:
@@ -81,56 +30,38 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column():
-            image = gr.Image(
-                label="Select an image", sources=["upload", "webcam"], type="filepath"
-            )
             with gr.Accordion("Advanced Settings", open=False):
-                steps = gr.Slider(
-                    label="Diffusion Steps", minimum=1, maximum=1000, step=1, value=500
-                )
-                prompt = gr.Text(
-                    label="Prompt",
-                    show_label=True,
-                    max_lines=1,
-                    placeholder="Enter your prompt",
-                    container=True,
-                )
-                num_audios = gr.Slider(
-                    label="Number of Audios", minimum=1, maximum=10, step=1, value=3
-                )
             submit_button = gr.Button("Submit")
         with gr.Column():
             processed_image = gr.Image(label="Processed Image")
-            generated_audio = gr.Audio(
-                label="Generated Audio",
-                show_download_button=True,
-                show_share_button=True,
-                waveform_options=gr.WaveformOptions(
-                    waveform_color="#01C6FF",
-                    waveform_progress_color="#0066B4",
-                    show_controls=True,
-                ),
-            )
-    # load examples with manually cached outputs
     gr.Examples(
-        examples=[
-            ["examples/1.png", 3, "A scenic mountain view", 500]
-        ],
         inputs=[image, num_audios, prompt, steps],
         outputs=[processed_image, generated_audio],
-        cache_examples="lazy",  # Cache outputs as users interact
-        fn=process_image
     )
     submit_button.click(
-        process_image,
-        inputs=[image, num_audios, prompt, steps],
         outputs=[processed_image, generated_audio]
     )
 if __name__ == "__main__":
-    demo.launch()

 from typing import Tuple, Union
 import gradio as gr
 import os
 from PIL import Image
 CACHE_DIR = "gradio_cached_examples"
+def load_cached_example_outputs(example_index: int) -> Tuple[str, str]:
+    cached_dir = os.path.join(CACHE_DIR, str(example_index))  # Use the example index to find the directory
     cached_image_path = os.path.join(cached_dir, "processed_image.png")
     cached_audio_path = os.path.join(cached_dir, "audio.wav")
     if os.path.exists(cached_image_path) and os.path.exists(cached_audio_path):
         return cached_image_path, cached_audio_path
+    else:
+        raise FileNotFoundError(f"Cached outputs not found for example {example_index}")
 description_text = """# SEE-2-SOUND 🔊 Demo
 Official demo for *SEE-2-SOUND 🔊: Zero-Shot Spatial Environment-to-Spatial Sound*.
 """
 css = """
+h1 { text-align: center; }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column():
+            image = gr.Image(label="Select an image", sources=["upload", "webcam"], type="filepath")
             with gr.Accordion("Advanced Settings", open=False):
+                steps = gr.Slider(label="Diffusion Steps", minimum=1, maximum=1000, step=1, value=500)
+                prompt = gr.Text(label="Prompt", max_lines=1, placeholder="Enter your prompt")
+                num_audios = gr.Slider(label="Number of Audios", minimum=1, maximum=10, step=1, value=3)
             submit_button = gr.Button("Submit")
         with gr.Column():
             processed_image = gr.Image(label="Processed Image")
+            generated_audio = gr.Audio(label="Generated Audio", show_download_button=True)
+    def on_example_click(example_input):
+        return load_cached_example_outputs(1)  # Always use example 1 for now
     gr.Examples(
+        examples=[["examples/1.png", 3, "A scenic mountain view", 500]],  # Example input
         inputs=[image, num_audios, prompt, steps],
         outputs=[processed_image, generated_audio],
+        cache_examples=True,  # Cache examples to avoid running the model
+        fn=on_example_click  # Load the cached output when the example is clicked
     )
     submit_button.click(
+        fn=on_example_click,
+        inputs=[image, num_audios, prompt, steps],
         outputs=[processed_image, generated_audio]
     )
 if __name__ == "__main__":
+    demo.launch()