Spaces:
Runtime error
Runtime error
File size: 10,990 Bytes
cbba703 374a0d6 cbba703 199d28b cbba703 02df5d4 cbba703 6c4be6b 02df5d4 3a4042f 6c4be6b 3417b69 3a4042f 02df5d4 49dc097 6c4be6b 986ef15 31f6f75 49dc097 cbba703 49dc097 cbba703 986ef15 b202164 ae6071d 49dc097 cbba703 f41534f cbba703 49dc097 cbba703 6c4be6b 48ec70e 986ef15 b202164 ae6071d 6c4be6b cbba703 6c4be6b d4a60e3 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b cbba703 6c4be6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import gradio as gr
from io import BytesIO
import requests
import PIL
from PIL import Image
import numpy as np
import os
import uuid
import torch
from torch import autocast
import cv2
from matplotlib import pyplot as plt
from inpainting import StableDiffusionInpaintingPipeline
from torchvision import transforms
from clipseg.models.clipseg import CLIPDensePredT
auth_token = os.environ.get("API_TOKEN") or True
def download_image(url):
response = requests.get(url)
return PIL.Image.open(BytesIO(response.content)).convert("RGB")
#device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("The model will be running on :: ", device, " ~device")
pipe = StableDiffusionInpaintingPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4",
#revision="fp16",
torch_dtype=torch.float16,
use_auth_token=auth_token,
).to(device)
#model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64)
model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64, complex_trans_conv=True)
model = model.to(torch.device(device))
model.eval()
model.load_state_dict(torch.load('./clipseg/weights/rd64-uni.pth', map_location=torch.device(device)), strict=False)
print ("Torch load(model) : ", model)
imgRes = 256 #512
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
transforms.Resize((imgRes, imgRes)),
])
def predict(radio, dict, word_mask, prompt=""):
if(radio == "draw a mask above"):
#with autocast("cuda"):
#with autocast(device): #enable=(False if device=='cpu' else True)):
#with autocast(enabled=True, dtype=torch.bfloat16):
with torch.cuda.amp.autocast(True):
init_image = dict["image"].convert("RGB").resize((imgRes, imgRes))
mask = dict["mask"].convert("RGB").resize((imgRes, imgRes))
else:
img = transform(dict["image"]).unsqueeze(0)
word_masks = [word_mask]
with torch.no_grad():
preds = model(img.repeat(len(word_masks),1,1,1), word_masks)[0]
init_image = dict['image'].convert('RGB').resize((imgRes, imgRes))
filename = f"{uuid.uuid4()}.png"
plt.imsave(filename,torch.sigmoid(preds[0][0]))
img2 = cv2.imread(filename)
gray_image = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
(thresh, bw_image) = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY)
cv2.cvtColor(bw_image, cv2.COLOR_BGR2RGB)
mask = Image.fromarray(np.uint8(bw_image)).convert('RGB')
os.remove(filename)
#with autocast("cuda"):
#with autocast(device): #enable=(False if device=='cpu' else True)):
#with autocast(enabled=True, dtype=torch.bfloat16):
with torch.cuda.amp.autocast(True):
images = pipe(prompt = prompt, init_image=init_image, mask_image=mask, strength=0.8)["sample"]
return images[0]
# examples = [[dict(image="init_image.png", mask="mask_image.png"), "A panda sitting on a bench"]]
css = '''
.container {max-width: 1150px;margin: auto;padding-top: 1.5rem}
#image_upload{min-height:400px}
#image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 400px}
#mask_radio .gr-form{background:transparent; border: none}
#word_mask{margin-top: .75em !important}
#word_mask textarea:disabled{opacity: 0.3}
.footer {margin-bottom: 45px;margin-top: 35px;text-align: center;border-bottom: 1px solid #e5e5e5}
.footer>p {font-size: .8rem; display: inline-block; padding: 0 10px;transform: translateY(10px);background: white}
.dark .footer {border-color: #303030}
.dark .footer>p {background: #0b0f19}
.acknowledgments h4{margin: 1.25em 0 .25em 0;font-weight: bold;font-size: 115%}
#image_upload .touch-none{display: flex}
'''
def swap_word_mask(radio_option):
if(radio_option == "type what to mask below"):
return gr.update(interactive=True, placeholder="A cat")
else:
return gr.update(interactive=False, placeholder="Disabled")
image_blocks = gr.Blocks(css=css)
with image_blocks as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<svg
width="0.65em"
height="0.65em"
viewBox="0 0 115 115"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<rect width="23" height="23" fill="white"></rect>
<rect y="69" width="23" height="23" fill="white"></rect>
<rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
<rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
<rect x="46" width="23" height="23" fill="white"></rect>
<rect x="46" y="69" width="23" height="23" fill="white"></rect>
<rect x="69" width="23" height="23" fill="black"></rect>
<rect x="69" y="69" width="23" height="23" fill="black"></rect>
<rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
<rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
<rect x="115" y="46" width="23" height="23" fill="white"></rect>
<rect x="115" y="115" width="23" height="23" fill="white"></rect>
<rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
<rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
<rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
<rect x="92" y="69" width="23" height="23" fill="white"></rect>
<rect x="69" y="46" width="23" height="23" fill="white"></rect>
<rect x="69" y="115" width="23" height="23" fill="white"></rect>
<rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
<rect x="46" y="46" width="23" height="23" fill="black"></rect>
<rect x="46" y="115" width="23" height="23" fill="black"></rect>
<rect x="46" y="69" width="23" height="23" fill="black"></rect>
<rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
<rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
<rect x="23" y="69" width="23" height="23" fill="black"></rect>
</svg>
<h1 style="font-weight: 900; margin-bottom: 7px;">
Stable Diffusion Multi Inpainting
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Inpaint Stable Diffusion by either drawing a mask or typing what to replace
</p>
</div>
"""
)
with gr.Row():
with gr.Column():
image = gr.Image(source='upload', tool='sketch', elem_id="image_upload", type="pil", label="Upload").style(height=400)
with gr.Box(elem_id="mask_radio").style(border=False):
radio = gr.Radio(["draw a mask above", "type what to mask below"], value="draw a mask above", show_label=False, interactive=True).style(container=False)
word_mask = gr.Textbox(label = "What to find in your image", interactive=False, elem_id="word_mask", placeholder="Disabled").style(container=False)
prompt = gr.Textbox(label = 'Your prompt (what you want to add in place of what you are removing)')
radio.change(fn=swap_word_mask, inputs=radio, outputs=word_mask,show_progress=False)
radio.change(None, inputs=[], outputs=image_blocks, _js = """
() => {
css_style = document.styleSheets[document.styleSheets.length - 1]
last_item = css_style.cssRules[css_style.cssRules.length - 1]
last_item.style.display = ["flex", ""].includes(last_item.style.display) ? "none" : "flex";
}""")
btn = gr.Button("Run")
with gr.Column():
result = gr.Image(label="Result")
btn.click(fn=predict, inputs=[radio, image, word_mask, prompt], outputs=result)
gr.HTML(
"""
<div class="footer">
<p>Model by <a href="https://huggingface.co./CompVis" style="text-decoration: underline;" target="_blank">CompVis</a> and <a href="https://huggingface.co./stabilityai" style="text-decoration: underline;" target="_blank">Stability AI</a> - Inpainting by <a href="https://github.com/nagolinc" style="text-decoration: underline;" target="_blank">nagolinc</a> and <a href="https://github.com/patil-suraj" style="text-decoration: underline;">patil-suraj</a>, inpainting with words by <a href="https://twitter.com/yvrjsharma/" style="text-decoration: underline;" target="_blank">@yvrjsharma</a> and <a href="https://twitter.com/1littlecoder" style="text-decoration: underline;">@1littlecoder</a> - Gradio Demo by 🤗 Hugging Face
</p>
</div>
<div class="acknowledgments">
<p><h4>LICENSE</h4>
The model is licensed with a <a href="https://huggingface.co./spaces/CompVis/stable-diffusion-license" style="text-decoration: underline;" target="_blank">CreativeML Open RAIL-M</a> license. The authors claim no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in this license. The license forbids you from sharing any content that violates any laws, produce any harm to a person, disseminate any personal information that would be meant for harm, spread misinformation and target vulnerable groups. For the full list of restrictions please <a href="https://huggingface.co./spaces/CompVis/stable-diffusion-license" target="_blank" style="text-decoration: underline;" target="_blank">read the license</a></p>
<p><h4>Biases and content acknowledgment</h4>
Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exacerbates societal biases, as well as realistic faces, pornography and violence. The model was trained on the <a href="https://laion.ai/blog/laion-5b/" style="text-decoration: underline;" target="_blank">LAION-5B dataset</a>, which scraped non-curated image-text-pairs from the internet (the exception being the removal of illegal content) and is meant for research purposes. You can read more in the <a href="https://huggingface.co./CompVis/stable-diffusion-v1-4" style="text-decoration: underline;" target="_blank">model card</a></p>
</div>
"""
)
demo.launch() |