Luis Oala commited on
Commit
2e0468e
β€’
1 Parent(s): c909455
Files changed (9) hide show
  1. README.md +3 -4
  2. README.md~ +4 -4
  3. app.py +7 -5
  4. app.py~ +12 -8
  5. notebooks/clip_guided.ipynb +1 -13
  6. notebooks/inpaint.ipynb +1 -13
  7. notebooks/text2im.ipynb +1 -13
  8. server.py +175 -0
  9. setup.py +1 -15
README.md CHANGED
@@ -1,8 +1,7 @@
1
  ---
2
- title: glide
3
- emoji: πŸ”₯
4
- colorFrom: red
5
- colorTo: purple
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
 
1
  ---
2
+ title: glide-test
3
+ colorFrom: green
4
+ colorTo: green
 
5
  sdk: gradio
6
  app_file: app.py
7
  pinned: false
README.md~ CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Glide
3
- emoji: πŸ”₯
4
- colorFrom: red
5
- colorTo: purple
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
 
1
  ---
2
+ title: Glide Text2im
3
+ emoji: πŸ“Š
4
+ colorFrom: purple
5
+ colorTo: gray
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  os.system('pip install -e .')
3
  import gradio as gr
@@ -16,6 +17,11 @@ from glide_text2im.model_creation import (
16
  model_and_diffusion_defaults_upsampler
17
  )
18
 
 
 
 
 
 
19
  # print("Loading models...")
20
  # app = FastAPI()
21
 
@@ -23,10 +29,6 @@ from glide_text2im.model_creation import (
23
  # On CPU, generating one sample may take on the order of 20 minutes.
24
  # On a GPU, it should be under a minute.
25
 
26
- """
27
- credit: follows the gradio glide example by valhalla https://huggingface.co/spaces/valhalla/glide-text2im
28
- """
29
-
30
  has_cuda = th.cuda.is_available()
31
  device = th.device('cpu' if not has_cuda else 'cuda')
32
 
@@ -188,7 +190,7 @@ description = "text conditioned image generation demo using openai's GLIDE model
188
 
189
  iface = gr.Interface(fn=sample,
190
  inputs=gr.inputs.Textbox(label='enter text'),
191
- outputs=gr.outputs.Image(type="pil", label="model input + completions"),
192
  title=title,
193
  description=description,
194
  enable_queue=True)
 
1
+
2
  import os
3
  os.system('pip install -e .')
4
  import gradio as gr
 
17
  model_and_diffusion_defaults_upsampler
18
  )
19
 
20
+ """
21
+ credit: follows the gradio glide example by valhalla https://huggingface.co/spaces/valhalla/glide-text2im
22
+ """
23
+
24
+
25
  # print("Loading models...")
26
  # app = FastAPI()
27
 
 
29
  # On CPU, generating one sample may take on the order of 20 minutes.
30
  # On a GPU, it should be under a minute.
31
 
 
 
 
 
32
  has_cuda = th.cuda.is_available()
33
  device = th.device('cpu' if not has_cuda else 'cuda')
34
 
 
190
 
191
  iface = gr.Interface(fn=sample,
192
  inputs=gr.inputs.Textbox(label='enter text'),
193
+ outputs=gr.outputs.Image(type="pil", label="..."),
194
  title=title,
195
  description=description,
196
  enable_queue=True)
app.py~ CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  os.system('pip install -e .')
3
  import gradio as gr
@@ -16,6 +17,11 @@ from glide_text2im.model_creation import (
16
  model_and_diffusion_defaults_upsampler
17
  )
18
 
 
 
 
 
 
19
  # print("Loading models...")
20
  # app = FastAPI()
21
 
@@ -23,10 +29,6 @@ from glide_text2im.model_creation import (
23
  # On CPU, generating one sample may take on the order of 20 minutes.
24
  # On a GPU, it should be under a minute.
25
 
26
- """
27
- credit: follows the gradio glide example by valhalla https://huggingface.co/spaces/valhalla/glide-text2im
28
- """
29
-
30
  has_cuda = th.cuda.is_available()
31
  device = th.device('cpu' if not has_cuda else 'cuda')
32
 
@@ -183,12 +185,14 @@ def to_base64(pil_image):
183
  pil_image.save(buffered, format="JPEG")
184
  return base64.b64encode(buffered.getvalue())
185
 
186
- title = "glide test"
187
- description = "text conditioned image generation demo using openai's GLIDE model (text-guided diffusion model) https://arxiv.org/abs/2112.10741 & https://github.com/openai/glide-text2im/. should take ~500s to run. credit to valhalla for gradio template https://huggingface.co/spaces/valhalla/."
 
 
188
 
189
  iface = gr.Interface(fn=sample,
190
- inputs=gr.inputs.Textbox(label='enter text'),
191
- outputs=gr.outputs.Image(type="pil", label="model input + completions"),
192
  title=title,
193
  description=description,
194
  article=article,
 
1
+
2
  import os
3
  os.system('pip install -e .')
4
  import gradio as gr
 
17
  model_and_diffusion_defaults_upsampler
18
  )
19
 
20
+ """
21
+ credit: follows the gradio glide example by valhalla https://huggingface.co/spaces/valhalla/glide-text2im
22
+ """
23
+
24
+
25
  # print("Loading models...")
26
  # app = FastAPI()
27
 
 
29
  # On CPU, generating one sample may take on the order of 20 minutes.
30
  # On a GPU, it should be under a minute.
31
 
 
 
 
 
32
  has_cuda = th.cuda.is_available()
33
  device = th.device('cpu' if not has_cuda else 'cuda')
34
 
 
185
  pil_image.save(buffered, format="JPEG")
186
  return base64.b64encode(buffered.getvalue())
187
 
188
+ title = "Interactive demo: glide-text2im"
189
+ description = "Demo for OpenAI's GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models."
190
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10741'>GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> | <a href='https://github.com/openai/glide-text2im/'>Official Repo</a></p>"
191
+ examples =["an oil painting of a corgi"]
192
 
193
  iface = gr.Interface(fn=sample,
194
+ inputs=gr.inputs.Textbox(label='What would you like to see?'),
195
+ outputs=gr.outputs.Image(type="pil", label="Model input + completions"),
196
  title=title,
197
  description=description,
198
  article=article,
notebooks/clip_guided.ipynb CHANGED
@@ -1,16 +1,5 @@
1
  {
2
  "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "# Run this line in Colab to install the package if it is\n",
10
- "# not already installed.\n",
11
- "!pip install git+https://github.com/openai/glide-text2im"
12
- ]
13
- },
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
@@ -238,8 +227,7 @@
238
  "nbconvert_exporter": "python",
239
  "pygments_lexer": "ipython3",
240
  "version": "3.7.3"
241
- },
242
- "accelerator": "GPU"
243
  },
244
  "nbformat": 4,
245
  "nbformat_minor": 2
 
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
 
227
  "nbconvert_exporter": "python",
228
  "pygments_lexer": "ipython3",
229
  "version": "3.7.3"
230
+ }
 
231
  },
232
  "nbformat": 4,
233
  "nbformat_minor": 2
notebooks/inpaint.ipynb CHANGED
@@ -1,16 +1,5 @@
1
  {
2
  "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "# Run this line in Colab to install the package if it is\n",
10
- "# not already installed.\n",
11
- "!pip install git+https://github.com/openai/glide-text2im"
12
- ]
13
- },
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
@@ -294,8 +283,7 @@
294
  "nbconvert_exporter": "python",
295
  "pygments_lexer": "ipython3",
296
  "version": "3.7.3"
297
- },
298
- "accelerator": "GPU"
299
  },
300
  "nbformat": 4,
301
  "nbformat_minor": 2
 
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
 
283
  "nbconvert_exporter": "python",
284
  "pygments_lexer": "ipython3",
285
  "version": "3.7.3"
286
+ }
 
287
  },
288
  "nbformat": 4,
289
  "nbformat_minor": 2
notebooks/text2im.ipynb CHANGED
@@ -1,16 +1,5 @@
1
  {
2
  "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "# Run this line in Colab to install the package if it is\n",
10
- "# not already installed.\n",
11
- "!pip install git+https://github.com/openai/glide-text2im"
12
- ]
13
- },
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
@@ -243,8 +232,7 @@
243
  "nbconvert_exporter": "python",
244
  "pygments_lexer": "ipython3",
245
  "version": "3.7.3"
246
- },
247
- "accelerator": "GPU"
248
  },
249
  "nbformat": 4,
250
  "nbformat_minor": 2
 
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
 
232
  "nbconvert_exporter": "python",
233
  "pygments_lexer": "ipython3",
234
  "version": "3.7.3"
235
+ }
 
236
  },
237
  "nbformat": 4,
238
  "nbformat_minor": 2
server.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from io import BytesIO
3
+ from fastapi import FastAPI
4
+
5
+ from PIL import Image
6
+ import torch as th
7
+
8
+ from glide_text2im.download import load_checkpoint
9
+ from glide_text2im.model_creation import (
10
+ create_model_and_diffusion,
11
+ model_and_diffusion_defaults,
12
+ model_and_diffusion_defaults_upsampler
13
+ )
14
+
15
+ print("Loading models...")
16
+ app = FastAPI()
17
+
18
+ # This notebook supports both CPU and GPU.
19
+ # On CPU, generating one sample may take on the order of 20 minutes.
20
+ # On a GPU, it should be under a minute.
21
+
22
+ has_cuda = th.cuda.is_available()
23
+ device = th.device('cpu' if not has_cuda else 'cuda')
24
+
25
+ # Create base model.
26
+ options = model_and_diffusion_defaults()
27
+ options['use_fp16'] = has_cuda
28
+ options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
29
+ model, diffusion = create_model_and_diffusion(**options)
30
+ model.eval()
31
+ if has_cuda:
32
+ model.convert_to_fp16()
33
+ model.to(device)
34
+ model.load_state_dict(load_checkpoint('base', device))
35
+ print('total base parameters', sum(x.numel() for x in model.parameters()))
36
+
37
+ # Create upsampler model.
38
+ options_up = model_and_diffusion_defaults_upsampler()
39
+ options_up['use_fp16'] = has_cuda
40
+ options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
41
+ model_up, diffusion_up = create_model_and_diffusion(**options_up)
42
+ model_up.eval()
43
+ if has_cuda:
44
+ model_up.convert_to_fp16()
45
+ model_up.to(device)
46
+ model_up.load_state_dict(load_checkpoint('upsample', device))
47
+ print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
48
+
49
+
50
+ def get_images(batch: th.Tensor):
51
+ """ Display a batch of images inline. """
52
+ scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
53
+ reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
54
+ Image.fromarray(reshaped.numpy())
55
+
56
+
57
+ # Create a classifier-free guidance sampling function
58
+ guidance_scale = 3.0
59
+
60
+ def model_fn(x_t, ts, **kwargs):
61
+ half = x_t[: len(x_t) // 2]
62
+ combined = th.cat([half, half], dim=0)
63
+ model_out = model(combined, ts, **kwargs)
64
+ eps, rest = model_out[:, :3], model_out[:, 3:]
65
+ cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
66
+ half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
67
+ eps = th.cat([half_eps, half_eps], dim=0)
68
+ return th.cat([eps, rest], dim=1)
69
+
70
+
71
+ @app.get("/")
72
+ def read_root():
73
+ return {"glide!"}
74
+
75
+ @app.get("/{generate}")
76
+ def sample(prompt):
77
+ # Sampling parameters
78
+ batch_size = 1
79
+
80
+ # Tune this parameter to control the sharpness of 256x256 images.
81
+ # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
82
+ upsample_temp = 0.997
83
+
84
+ ##############################
85
+ # Sample from the base model #
86
+ ##############################
87
+
88
+ # Create the text tokens to feed to the model.
89
+ tokens = model.tokenizer.encode(prompt)
90
+ tokens, mask = model.tokenizer.padded_tokens_and_mask(
91
+ tokens, options['text_ctx']
92
+ )
93
+
94
+ # Create the classifier-free guidance tokens (empty)
95
+ full_batch_size = batch_size * 2
96
+ uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(
97
+ [], options['text_ctx']
98
+ )
99
+
100
+ # Pack the tokens together into model kwargs.
101
+ model_kwargs = dict(
102
+ tokens=th.tensor(
103
+ [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
104
+ ),
105
+ mask=th.tensor(
106
+ [mask] * batch_size + [uncond_mask] * batch_size,
107
+ dtype=th.bool,
108
+ device=device,
109
+ ),
110
+ )
111
+
112
+ # Sample from the base model.
113
+ model.del_cache()
114
+ samples = diffusion.p_sample_loop(
115
+ model_fn,
116
+ (full_batch_size, 3, options["image_size"], options["image_size"]),
117
+ device=device,
118
+ clip_denoised=True,
119
+ progress=True,
120
+ model_kwargs=model_kwargs,
121
+ cond_fn=None,
122
+ )[:batch_size]
123
+ model.del_cache()
124
+
125
+
126
+ ##############################
127
+ # Upsample the 64x64 samples #
128
+ ##############################
129
+
130
+ tokens = model_up.tokenizer.encode(prompt)
131
+ tokens, mask = model_up.tokenizer.padded_tokens_and_mask(
132
+ tokens, options_up['text_ctx']
133
+ )
134
+
135
+ # Create the model conditioning dict.
136
+ model_kwargs = dict(
137
+ # Low-res image to upsample.
138
+ low_res=((samples+1)*127.5).round()/127.5 - 1,
139
+
140
+ # Text tokens
141
+ tokens=th.tensor(
142
+ [tokens] * batch_size, device=device
143
+ ),
144
+ mask=th.tensor(
145
+ [mask] * batch_size,
146
+ dtype=th.bool,
147
+ device=device,
148
+ ),
149
+ )
150
+
151
+ # Sample from the base model.
152
+ model_up.del_cache()
153
+ up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
154
+ up_samples = diffusion_up.ddim_sample_loop(
155
+ model_up,
156
+ up_shape,
157
+ noise=th.randn(up_shape, device=device) * upsample_temp,
158
+ device=device,
159
+ clip_denoised=True,
160
+ progress=True,
161
+ model_kwargs=model_kwargs,
162
+ cond_fn=None,
163
+ )[:batch_size]
164
+ model_up.del_cache()
165
+
166
+ # Show the output
167
+ image = get_images(up_samples)
168
+ image = to_base64(image)
169
+ return {"image": image}
170
+
171
+
172
+ def to_base64(pil_image):
173
+ buffered = BytesIO()
174
+ pil_image.save(buffered, format="JPEG")
175
+ return base64.b64encode(buffered.getvalue())
setup.py CHANGED
@@ -2,19 +2,7 @@ from setuptools import setup
2
 
3
  setup(
4
  name="glide-text2im",
5
- packages=[
6
- "glide_text2im",
7
- "glide_text2im.clip",
8
- "glide_text2im.tokenizer",
9
- ],
10
- package_data={
11
- "glide_text2im.tokenizer": [
12
- "bpe_simple_vocab_16e6.txt.gz",
13
- "encoder.json.gz",
14
- "vocab.bpe.gz",
15
- ],
16
- "glide_text2im.clip": ["config.yaml"],
17
- },
18
  install_requires=[
19
  "Pillow",
20
  "attrs",
@@ -22,8 +10,6 @@ setup(
22
  "filelock",
23
  "requests",
24
  "tqdm",
25
- "ftfy",
26
- "regex",
27
  ],
28
  author="OpenAI",
29
  )
 
2
 
3
  setup(
4
  name="glide-text2im",
5
+ packages=["glide_text2im"],
 
 
 
 
 
 
 
 
 
 
 
 
6
  install_requires=[
7
  "Pillow",
8
  "attrs",
 
10
  "filelock",
11
  "requests",
12
  "tqdm",
 
 
13
  ],
14
  author="OpenAI",
15
  )