blanchon commited on
Commit
6e81bdd
·
1 Parent(s): bf9e848

Add hiresfix

Browse files
Files changed (1) hide show
  1. app.py +156 -47
app.py CHANGED
@@ -20,6 +20,8 @@ SYSTEM_PROMPT = r"""This two-panel split-frame image showcases a furniture in as
20
  [LEFT] standalone product shot image the furniture on a white background.
21
  [RIGHT] integrated example within a room scene."""
22
 
 
 
23
  if not torch.cuda.is_available():
24
 
25
  def _dummy_pipe(image: Image.Image, *args, **kwargs): # noqa: ARG001
@@ -78,68 +80,135 @@ def make_example(image_path: Path, mask_path: Path) -> EditorValue:
78
  }
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  @spaces.GPU(duration=150)
82
  def infer(
83
- furniture_image: Image.Image,
84
- room_image: EditorValue,
85
- prompt: str = "",
86
  seed: int = 42,
87
  randomize_seed: bool = False,
88
  guidance_scale: float = 3.5,
89
  num_inference_steps: int = 20,
90
  max_dimension: int = 720,
 
91
  progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
92
  ):
93
  # Ensure max_dimension is a multiple of 16 (for VAE)
94
  max_dimension = (max_dimension // 16) * 16
95
 
96
- _room_image = room_image["background"]
97
- if _room_image is None:
98
  msg = "Room image is required"
99
  raise ValueError(msg)
100
- _room_image = cast("Image.Image", _room_image)
101
- _room_image = ImageOps.fit(
102
- _room_image,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  (max_dimension, max_dimension),
104
  method=Image.Resampling.LANCZOS,
105
  centering=(0.5, 0.5),
106
  )
107
 
108
- _room_mask = room_image["layers"][0]
109
- if _room_mask is None:
110
- msg = "Room mask is required"
111
- raise ValueError(msg)
112
- _room_mask = cast("Image.Image", _room_mask)
113
- _room_mask = ImageOps.fit(
114
- _room_mask,
 
 
 
 
 
 
 
 
 
115
  (max_dimension, max_dimension),
116
  method=Image.Resampling.LANCZOS,
117
  centering=(0.5, 0.5),
118
  )
119
 
120
- # _room_image.save("room_image.png")
 
 
 
 
 
 
 
 
 
121
  # _room_mask_with_white_background = Image.new(
122
  # "RGB", _room_mask.size, (255, 255, 255)
123
  # )
124
  # _room_mask_with_white_background.paste(_room_mask, (0, 0), _room_mask)
125
- # _room_mask_with_white_background.save("room_mask.png")
126
 
127
- furniture_image = ImageOps.fit(
128
- furniture_image,
129
  (max_dimension, max_dimension),
130
- method=Image.Resampling.LANCZOS,
 
131
  centering=(0.5, 0.5),
132
  )
133
- _furniture_image = Image.new(
134
- "RGB",
135
- (max_dimension, max_dimension),
136
- (255, 255, 255),
137
- )
138
- _furniture_image.paste(furniture_image, (0, 0))
139
 
140
- # _furniture_image.save("furniture_image.png")
141
 
142
- _furniture_mask = Image.new("RGB", (max_dimension, max_dimension), (255, 255, 255))
143
 
144
  image = Image.new(
145
  "RGB",
@@ -147,16 +216,16 @@ def infer(
147
  (255, 255, 255),
148
  )
149
  # Paste on the center of the image
150
- image.paste(_furniture_image, (0, 0))
151
- image.paste(_room_image, (max_dimension, 0))
152
 
153
  mask = Image.new(
154
  "RGB",
155
  (max_dimension * 2, max_dimension),
156
  (255, 255, 255),
157
  )
158
- mask.paste(_furniture_mask, (0, 0))
159
- mask.paste(_room_mask, (max_dimension, 0), _room_mask)
160
  # Invert the mask
161
  mask = ImageOps.invert(mask)
162
  # Blur the mask
@@ -167,7 +236,11 @@ def infer(
167
  if randomize_seed:
168
  seed = secrets.randbelow(MAX_SEED)
169
 
170
- prompt = prompt + ".\n" + SYSTEM_PROMPT if prompt else SYSTEM_PROMPT
 
 
 
 
171
  results_images = pipe(
172
  prompt=prompt,
173
  image=image,
@@ -176,16 +249,36 @@ def infer(
176
  width=max_dimension * 2,
177
  num_inference_steps=num_inference_steps,
178
  guidance_scale=guidance_scale,
179
- num_images_per_prompt=2,
180
  generator=torch.Generator("cpu").manual_seed(seed),
181
  )["images"]
182
 
183
- cropped_images = [
184
- image.crop((max_dimension, 0, max_dimension * 2, max_dimension))
185
- for image in results_images
186
- ]
187
 
188
- return cropped_images, seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
 
191
  intro_markdown = r"""
@@ -241,7 +334,7 @@ with gr.Blocks(css=css) as demo:
241
  """,
242
  max_height=50,
243
  )
244
- furniture_image = gr.Image(
245
  label="Furniture Image",
246
  type="pil",
247
  sources=["upload"],
@@ -254,7 +347,7 @@ with gr.Blocks(css=css) as demo:
254
  EXAMPLES_DIR / "2" / "furniture_image.png",
255
  ],
256
  examples_per_page=12,
257
- inputs=[furniture_image],
258
  )
259
  with gr.Column(elem_id="col-mid"):
260
  gr.HTML(
@@ -267,7 +360,7 @@ with gr.Blocks(css=css) as demo:
267
  """,
268
  max_height=50,
269
  )
270
- room_image = gr.ImageEditor(
271
  label="Room Image - Draw mask for inpainting",
272
  type="pil",
273
  sources=["upload"],
@@ -288,7 +381,7 @@ with gr.Blocks(css=css) as demo:
288
  EXAMPLES_DIR / "2" / "room_mask.png",
289
  ),
290
  ],
291
- inputs=[room_image],
292
  )
293
  with gr.Column(elem_id="col-right"):
294
  gr.HTML(
@@ -309,6 +402,12 @@ with gr.Blocks(css=css) as demo:
309
  height=500,
310
  )
311
  run_button = gr.Button("Run")
 
 
 
 
 
 
312
  with gr.Accordion("Advanced Settings", open=False):
313
  seed = gr.Slider(
314
  label="Seed",
@@ -334,6 +433,14 @@ with gr.Blocks(css=css) as demo:
334
  value=720,
335
  )
336
 
 
 
 
 
 
 
 
 
337
  guidance_scale = gr.Slider(
338
  label="Guidance Scale",
339
  minimum=1,
@@ -378,21 +485,23 @@ with gr.Blocks(css=css) as demo:
378
  ),
379
  ],
380
  ],
381
- inputs=[furniture_image, room_image],
382
  label=None,
383
  )
 
384
  gr.on(
385
- triggers=[run_button.click, furniture_prompt.submit],
386
  fn=infer,
387
  inputs=[
388
- furniture_image,
389
- room_image,
390
  furniture_prompt,
391
  seed,
392
  randomize_seed,
393
  guidance_scale,
394
  num_inference_steps,
395
  max_dimension,
 
396
  ],
397
  outputs=[results, seed],
398
  )
 
20
  [LEFT] standalone product shot image the furniture on a white background.
21
  [RIGHT] integrated example within a room scene."""
22
 
23
+ MASK_CONTEXT_PADDING = 16 * 8
24
+
25
  if not torch.cuda.is_available():
26
 
27
  def _dummy_pipe(image: Image.Image, *args, **kwargs): # noqa: ARG001
 
80
  }
81
 
82
 
83
+ def remove_padding(image, original_size):
84
+ # Get current dimensions
85
+ padded_width, padded_height = image.size
86
+ original_width, original_height = original_size
87
+
88
+ # Calculate cropping box
89
+ left = (padded_width - original_width) // 2
90
+ top = (padded_height - original_height) // 2
91
+ right = left + original_width
92
+ bottom = top + original_height
93
+
94
+ # Crop to original size
95
+ return image.crop((left, top, right, bottom))
96
+
97
+
98
  @spaces.GPU(duration=150)
99
  def infer(
100
+ furniture_image_input: Image.Image,
101
+ room_image_input: EditorValue,
102
+ furniture_prompt: str = "",
103
  seed: int = 42,
104
  randomize_seed: bool = False,
105
  guidance_scale: float = 3.5,
106
  num_inference_steps: int = 20,
107
  max_dimension: int = 720,
108
+ num_images_per_prompt: int = 2,
109
  progress: gr.Progress = gr.Progress(track_tqdm=True), # noqa: ARG001, B008
110
  ):
111
  # Ensure max_dimension is a multiple of 16 (for VAE)
112
  max_dimension = (max_dimension // 16) * 16
113
 
114
+ room_image = room_image_input["background"]
115
+ if room_image is None:
116
  msg = "Room image is required"
117
  raise ValueError(msg)
118
+ room_image = cast("Image.Image", room_image)
119
+
120
+ room_mask = room_image_input["layers"][0]
121
+ if room_mask is None:
122
+ msg = "Room mask is required"
123
+ raise ValueError(msg)
124
+ room_mask = cast("Image.Image", room_mask)
125
+
126
+ mask_bbox_x_min, mask_bbox_y_min, mask_bbox_x_max, mask_bbox_y_max = (
127
+ room_mask.getbbox(alpha_only=False)
128
+ )
129
+ # Add MASK_CONTEXT_PADDING (16 pixels) for the context
130
+ mask_bbox_x_min -= MASK_CONTEXT_PADDING
131
+ mask_bbox_x_min = max(mask_bbox_x_min, 0)
132
+ mask_bbox_y_min -= MASK_CONTEXT_PADDING
133
+ mask_bbox_y_min = max(mask_bbox_y_min, 0)
134
+ mask_bbox_x_max += MASK_CONTEXT_PADDING
135
+ mask_bbox_x_max = min(mask_bbox_x_max, room_mask.width)
136
+ mask_bbox_y_max += MASK_CONTEXT_PADDING
137
+ mask_bbox_y_max = min(mask_bbox_y_max, room_mask.height)
138
+
139
+ bbox_longest_side = max(
140
+ mask_bbox_x_max - mask_bbox_x_min,
141
+ mask_bbox_y_max - mask_bbox_y_min,
142
+ )
143
+
144
+ room_image_cropped = room_image.crop((
145
+ mask_bbox_x_min,
146
+ mask_bbox_y_min,
147
+ mask_bbox_x_max,
148
+ mask_bbox_y_max,
149
+ ))
150
+ room_image_cropped = ImageOps.pad(
151
+ room_image_cropped,
152
+ (bbox_longest_side, bbox_longest_side),
153
+ # White padding
154
+ color=(255, 255, 255),
155
+ centering=(0.5, 0.5),
156
+ )
157
+ room_image_cropped = ImageOps.fit(
158
+ room_image_cropped,
159
  (max_dimension, max_dimension),
160
  method=Image.Resampling.LANCZOS,
161
  centering=(0.5, 0.5),
162
  )
163
 
164
+ room_mask_cropped = room_mask.crop((
165
+ mask_bbox_x_min,
166
+ mask_bbox_y_min,
167
+ mask_bbox_x_max,
168
+ mask_bbox_y_max,
169
+ ))
170
+ # room_mask_cropped.save("room_mask_croppedv1.png")
171
+ room_mask_cropped = ImageOps.pad(
172
+ room_mask_cropped,
173
+ (max_dimension, max_dimension),
174
+ # White padding
175
+ color=(255, 255, 255),
176
+ centering=(0.5, 0.5),
177
+ )
178
+ room_mask_cropped = ImageOps.fit(
179
+ room_mask_cropped,
180
  (max_dimension, max_dimension),
181
  method=Image.Resampling.LANCZOS,
182
  centering=(0.5, 0.5),
183
  )
184
 
185
+ # room_image_cropped.save("room_image_cropped.png")
186
+ # room_mask_cropped.save("room_mask_cropped.png")
187
+
188
+ # _room_image = ImageOps.fit(
189
+ # _room_image,
190
+ # (max_dimension, max_dimension),
191
+ # method=Image.Resampling.LANCZOS,
192
+ # centering=(0.5, 0.5),
193
+ # )
194
+ _room_image.save("room_image.png")
195
  # _room_mask_with_white_background = Image.new(
196
  # "RGB", _room_mask.size, (255, 255, 255)
197
  # )
198
  # _room_mask_with_white_background.paste(_room_mask, (0, 0), _room_mask)
199
+ _room_mask_with_white_background.save("room_mask.png")
200
 
201
+ furniture_image = ImageOps.pad(
202
+ furniture_image_input,
203
  (max_dimension, max_dimension),
204
+ # White padding
205
+ color=(255, 255, 255),
206
  centering=(0.5, 0.5),
207
  )
 
 
 
 
 
 
208
 
209
+ _furniture_image.save("furniture_image.png")
210
 
211
+ furniture_mask = Image.new("RGB", (max_dimension, max_dimension), (255, 255, 255))
212
 
213
  image = Image.new(
214
  "RGB",
 
216
  (255, 255, 255),
217
  )
218
  # Paste on the center of the image
219
+ image.paste(furniture_image, (0, 0))
220
+ image.paste(room_image_cropped, (max_dimension, 0))
221
 
222
  mask = Image.new(
223
  "RGB",
224
  (max_dimension * 2, max_dimension),
225
  (255, 255, 255),
226
  )
227
+ mask.paste(furniture_mask, (0, 0))
228
+ mask.paste(room_mask_cropped, (max_dimension, 0), room_mask_cropped)
229
  # Invert the mask
230
  mask = ImageOps.invert(mask)
231
  # Blur the mask
 
236
  if randomize_seed:
237
  seed = secrets.randbelow(MAX_SEED)
238
 
239
+ prompt = (
240
+ furniture_prompt + ".\n" + SYSTEM_PROMPT if furniture_prompt else SYSTEM_PROMPT
241
+ )
242
+ # image.save("image.png")
243
+ # mask.save("mask.png")
244
  results_images = pipe(
245
  prompt=prompt,
246
  image=image,
 
249
  width=max_dimension * 2,
250
  num_inference_steps=num_inference_steps,
251
  guidance_scale=guidance_scale,
252
+ num_images_per_prompt=num_images_per_prompt,
253
  generator=torch.Generator("cpu").manual_seed(seed),
254
  )["images"]
255
 
256
+ final_images = []
257
+ for image in results_images:
258
+ final_image = room_image.copy()
 
259
 
260
+ # Downscale back to the bbox_longest_side
261
+ image_generated = image.crop((
262
+ max_dimension,
263
+ 0,
264
+ max_dimension * 2,
265
+ max_dimension,
266
+ ))
267
+ image_generated = image_generated.resize((bbox_longest_side, bbox_longest_side))
268
+ # Crop back to the bbox (remove the padding)
269
+ image_generated = remove_padding(
270
+ image_generated,
271
+ (
272
+ mask_bbox_x_max - mask_bbox_x_min,
273
+ mask_bbox_y_max - mask_bbox_y_min,
274
+ ),
275
+ )
276
+ # Paste the image on the room image as the crop was done
277
+ # on the room image
278
+ final_image.paste(image_generated, (mask_bbox_x_min, mask_bbox_y_min))
279
+ final_images.append(final_image)
280
+
281
+ return final_images, seed
282
 
283
 
284
  intro_markdown = r"""
 
334
  """,
335
  max_height=50,
336
  )
337
+ furniture_image_input = gr.Image(
338
  label="Furniture Image",
339
  type="pil",
340
  sources=["upload"],
 
347
  EXAMPLES_DIR / "2" / "furniture_image.png",
348
  ],
349
  examples_per_page=12,
350
+ inputs=[furniture_image_input],
351
  )
352
  with gr.Column(elem_id="col-mid"):
353
  gr.HTML(
 
360
  """,
361
  max_height=50,
362
  )
363
+ room_image_input = gr.ImageEditor(
364
  label="Room Image - Draw mask for inpainting",
365
  type="pil",
366
  sources=["upload"],
 
381
  EXAMPLES_DIR / "2" / "room_mask.png",
382
  ),
383
  ],
384
+ inputs=[room_image_input],
385
  )
386
  with gr.Column(elem_id="col-right"):
387
  gr.HTML(
 
402
  height=500,
403
  )
404
  run_button = gr.Button("Run")
405
+
406
+ # Reset the results when the run button is clicked
407
+ run_button.click(
408
+ outputs=results,
409
+ fn=lambda: None,
410
+ )
411
  with gr.Accordion("Advanced Settings", open=False):
412
  seed = gr.Slider(
413
  label="Seed",
 
433
  value=720,
434
  )
435
 
436
+ num_images_per_prompt = gr.Slider(
437
+ label="Number of images per prompt",
438
+ minimum=1,
439
+ maximum=4,
440
+ step=1,
441
+ value=2,
442
+ )
443
+
444
  guidance_scale = gr.Slider(
445
  label="Guidance Scale",
446
  minimum=1,
 
485
  ),
486
  ],
487
  ],
488
+ inputs=[furniture_image_input, room_image_input],
489
  label=None,
490
  )
491
+
492
  gr.on(
493
+ triggers=[run_button.click],
494
  fn=infer,
495
  inputs=[
496
+ furniture_image_input,
497
+ room_image_input,
498
  furniture_prompt,
499
  seed,
500
  randomize_seed,
501
  guidance_scale,
502
  num_inference_steps,
503
  max_dimension,
504
+ num_images_per_prompt,
505
  ],
506
  outputs=[results, seed],
507
  )