Spaces:

fffiloni
/

whisper-to-stable-diffusion

Paused

App Files Files Community

fffiloni commited on Sep 22, 2022

Commit

6e00cc0

•

1 Parent(s): a3d88bf

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -3

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import whisper
 from PIL import Image
@@ -9,7 +10,7 @@ from diffusers import StableDiffusionPipeline
 whisper_model = whisper.load_model("small")
-device="cpu"
 pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
 pipe.to(device)
@@ -50,7 +51,7 @@ gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="galler
 title="Whisper to Stable Diffusion"
 description="""
 <p style='text-align: center;'>
-This demo is running on CPU. Offered by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.whisper-to-stable-diffusion' style='display: inline-block' /><br />
 Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
 Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
 Try it in French ! ;)<br />
@@ -61,7 +62,7 @@ Try it in French ! ;)<br />
 article="""
 <p style='text-align: center;'>—<br />
 Whisper is a general-purpose speech recognition model. <br />
-It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.<br />
 Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
 </p>
 """

 import gradio as gr
+import torch
 import whisper
 from PIL import Image
 whisper_model = whisper.load_model("small")
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
 pipe.to(device)
 title="Whisper to Stable Diffusion"
 description="""
 <p style='text-align: center;'>
+This demo is running on CPU 🐢. Offered by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.whisper-to-stable-diffusion' style='display: inline-block' /><br />
 Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
 Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
 Try it in French ! ;)<br />
 article="""
 <p style='text-align: center;'>—<br />
 Whisper is a general-purpose speech recognition model. <br />
+It is trained on a large dataset of diverse audio and is also a multi-task model that can perform<br />multilingual speech recognition as well as speech translation and language identification.<br />
 Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
 </p>
 """