File size: 2,297 Bytes
4362f0a
 
 
 
 
3c12e7b
 
4362f0a
 
 
 
 
 
 
 
4e5bd0a
4362f0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Stable Diffusion 3 Inpaint Pipeline

| input image | input mask image | output |
|:-------------------------:|:-------------------------:|:-------------------------:|
|<img src="./overture-creations-5sI6fQgYIuo.png" width = "400" /> | <img src="./overture-creations-5sI6fQgYIuo_mask.png" width = "400" /> | <img src="./overture-creations-5sI6fQgYIuo_output.jpg" width = "400" /> |
|<img src="./overture-creations-5sI6fQgYIuo.png" width = "400" /> | <img src="./overture-creations-5sI6fQgYIuo_mask.png" width = "400" /> | <img src="./overture-creations-5sI6fQgYIuo_tiger.jpg" width = "400" /> |
|<img src="./overture-creations-5sI6fQgYIuo.png" width = "400" /> | <img src="./overture-creations-5sI6fQgYIuo_mask.png" width = "400" /> | <img src="./overture-creations-5sI6fQgYIuo_panda.jpg" width = "400" /> |

**Please ensure that the version of diffusers >= 0.29.1**

# Demo
```python

import torch

from torchvision import transforms



from pipeline_stable_diffusion_3_inpaint import StableDiffusion3InpaintPipeline

from diffusers.utils import load_image



def preprocess_image(image):

    image = image.convert("RGB")

    image = transforms.CenterCrop((image.size[1] // 64 * 64, image.size[0] // 64 * 64))(image)

    image = transforms.ToTensor()(image)

    image = image * 2 - 1

    image = image.unsqueeze(0).to("cuda")

    return image



def preprocess_mask(mask):

    mask = mask.convert("L")

    mask = transforms.CenterCrop((mask.size[1] // 64 * 64, mask.size[0] // 64 * 64))(mask)

    mask = transforms.ToTensor()(mask)

    mask = mask.to("cuda")

    return mask



pipe = StableDiffusion3InpaintPipeline.from_pretrained(

    "stabilityai/stable-diffusion-3-medium-diffusers",

    torch_dtype=torch.float16,

).to("cuda")



prompt = "Face of a yellow cat, high resolution, sitting on a park bench"

source_image = load_image(

    "./overture-creations-5sI6fQgYIuo.png"

)

source = preprocess_image(source_image)

mask = preprocess_mask(

    load_image(

        "./overture-creations-5sI6fQgYIuo_mask.png"

    )

)



image = pipe(

    prompt=prompt,

    image=source,

    mask_image=1-mask,

    height=1024,

    width=1024,

    num_inference_steps=28,

    guidance_scale=7.0,

    strength=0.6,

).images[0]



image.save("output.png")

```