Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import whisper
|
3 |
from PIL import Image
|
4 |
|
@@ -9,7 +10,7 @@ from diffusers import StableDiffusionPipeline
|
|
9 |
|
10 |
whisper_model = whisper.load_model("small")
|
11 |
|
12 |
-
device="cpu"
|
13 |
|
14 |
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
|
15 |
pipe.to(device)
|
@@ -50,7 +51,7 @@ gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="galler
|
|
50 |
title="Whisper to Stable Diffusion"
|
51 |
description="""
|
52 |
<p style='text-align: center;'>
|
53 |
-
This demo is running on CPU
|
54 |
Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
|
55 |
Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
|
56 |
Try it in French ! ;)<br />
|
@@ -61,7 +62,7 @@ Try it in French ! ;)<br />
|
|
61 |
article="""
|
62 |
<p style='text-align: center;'>—<br />
|
63 |
Whisper is a general-purpose speech recognition model. <br />
|
64 |
-
It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.<br />
|
65 |
Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
|
66 |
</p>
|
67 |
"""
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
import whisper
|
4 |
from PIL import Image
|
5 |
|
|
|
10 |
|
11 |
whisper_model = whisper.load_model("small")
|
12 |
|
13 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
14 |
|
15 |
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
|
16 |
pipe.to(device)
|
|
|
51 |
title="Whisper to Stable Diffusion"
|
52 |
description="""
|
53 |
<p style='text-align: center;'>
|
54 |
+
This demo is running on CPU 🐢. Offered by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.whisper-to-stable-diffusion' style='display: inline-block' /><br />
|
55 |
Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
|
56 |
Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
|
57 |
Try it in French ! ;)<br />
|
|
|
62 |
article="""
|
63 |
<p style='text-align: center;'>—<br />
|
64 |
Whisper is a general-purpose speech recognition model. <br />
|
65 |
+
It is trained on a large dataset of diverse audio and is also a multi-task model that can perform<br />multilingual speech recognition as well as speech translation and language identification.<br />
|
66 |
Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
|
67 |
</p>
|
68 |
"""
|