patrickvonplaten commited on
Commit
811d1c6
·
1 Parent(s): 7956ca3

fix more tests

Browse files
1 ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import UNet2DConditionModel
3
+ import torch
4
+
5
+ unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
6
+ unet.train()
7
+ unet.enable_gradient_checkpointing()
8
+ unet = unet.to("cuda:1")
9
+
10
+ batch_size = 8
11
+
12
+ sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
13
+ time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
14
+ encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
15
+ text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
16
+
17
+ out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
18
+
19
+ loss = ((out - sample) ** 2).mean()
20
+ loss.backward()
21
+
22
+ print(torch.cuda.max_memory_allocated(device=unet.device))
23
+
24
+
25
+ # no gradient checkpointing: 12,276,695,552
26
+ # curr gradient checkpointing: 10,862,276,096
all_branches.txt CHANGED
@@ -1,42 +1,2 @@
1
- CompVis/stable-diffusion-v1-3
2
- CompVis/stable-diffusion-v1-1
3
- CompVis/stable-diffusion-v1-2
4
  CompVis/stable-diffusion-v1-4
5
- hakurei/waifu-diffusion
6
- rinna/japanese-stable-diffusion
7
- CompVis/stable-diffusion-v1-5
8
- runwayml/stable-diffusion-inpainting
9
- fusing/sd-inpaint-temp
10
  runwayml/stable-diffusion-v1-5
11
- ckpt/sd15
12
- aarondotwork/sd-pokemon-diffusers
13
- technillogue/waifu-diffusion
14
- DGSpitzer/Cyberpunk-Anime-Diffusion
15
- microsoft/vq-diffusion-ithq
16
- fusing/rdm
17
- CompVis/ldm-super-resolution-4x-openimages
18
- BAAI/AltDiffusion
19
- fusing/test
20
- stabilityai/stable-diffusion-2
21
- stabilityai/stable-diffusion-2-base
22
- stabilityai/stable-diffusion-2-depth
23
- stabilityai/stable-diffusion-2-inpainting
24
- stabilityai/stable-diffusion-x4-upscaler
25
- jplumail/matthieu-v1-pipe
26
- stabilityai/stable-diffusion-2-1
27
- stabilityai/stable-diffusion-2-1-base
28
- jplumail/matthieu-v2-pipe
29
- timbrooks/instruct-pix2pix
30
- ruiruin/counmargemodel
31
- Nacholmo/AbyssOrangeMix2-hard-vae-swapped
32
- Nacholmo/Counterfeit-V2.5-vae-swapped
33
- Nacholmo/VOXO-v0-vtuber-diffusers
34
- p1atdev/pvc-v3
35
- Nacholmo/meinamixv7-diffusers
36
- gligen/diffusers-generation-text-box
37
- gligen/diffusers-inpainting-text-box
38
- zhg/deliberate
39
- philz1337/realism
40
- viktfb/patterngenai
41
- viktfb/patterngen-v1
42
- viktfb/style2.0
 
 
 
 
1
  CompVis/stable-diffusion-v1-4
 
 
 
 
 
2
  runwayml/stable-diffusion-v1-5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
check_for_branches.py CHANGED
@@ -26,7 +26,7 @@ if __name__ == "__main__":
26
  api = HfApi()
27
  branches = main(api, model_id)
28
 
29
- if "fp16" in branches:
30
  print(model_id)
31
  #
32
  # if len(branches) > 0:
 
26
  api = HfApi()
27
  branches = main(api, model_id)
28
 
29
+ if "non-ema" in branches:
30
  print(model_id)
31
  #
32
  # if len(branches) > 0:
collect_env.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Unlike the rest of the PyTorch this file must be python2 compliant.
3
+ # This script outputs relevant system environment info
4
+ # Run it with `python collect_env.py`.
5
+ import datetime
6
+ import locale
7
+ import re
8
+ import subprocess
9
+ import sys
10
+ import os
11
+ from collections import namedtuple
12
+
13
+
14
+ try:
15
+ import torch
16
+ TORCH_AVAILABLE = True
17
+ except (ImportError, NameError, AttributeError, OSError):
18
+ TORCH_AVAILABLE = False
19
+
20
+ # System Environment Information
21
+ SystemEnv = namedtuple('SystemEnv', [
22
+ 'torch_version',
23
+ 'is_debug_build',
24
+ 'cuda_compiled_version',
25
+ 'gcc_version',
26
+ 'clang_version',
27
+ 'cmake_version',
28
+ 'os',
29
+ 'libc_version',
30
+ 'python_version',
31
+ 'python_platform',
32
+ 'is_cuda_available',
33
+ 'cuda_runtime_version',
34
+ 'cuda_module_loading',
35
+ 'nvidia_driver_version',
36
+ 'nvidia_gpu_models',
37
+ 'cudnn_version',
38
+ 'pip_version', # 'pip' or 'pip3'
39
+ 'pip_packages',
40
+ 'conda_packages',
41
+ 'hip_compiled_version',
42
+ 'hip_runtime_version',
43
+ 'miopen_runtime_version',
44
+ 'caching_allocator_config',
45
+ 'is_xnnpack_available',
46
+ 'cpu_info',
47
+ ])
48
+
49
+
50
+ def run(command):
51
+ """Returns (return-code, stdout, stderr)"""
52
+ shell = True if type(command) is str else False
53
+ p = subprocess.Popen(command, stdout=subprocess.PIPE,
54
+ stderr=subprocess.PIPE, shell=shell)
55
+ raw_output, raw_err = p.communicate()
56
+ rc = p.returncode
57
+ if get_platform() == 'win32':
58
+ enc = 'oem'
59
+ else:
60
+ enc = locale.getpreferredencoding()
61
+ output = raw_output.decode(enc)
62
+ err = raw_err.decode(enc)
63
+ return rc, output.strip(), err.strip()
64
+
65
+
66
+ def run_and_read_all(run_lambda, command):
67
+ """Runs command using run_lambda; reads and returns entire output if rc is 0"""
68
+ rc, out, _ = run_lambda(command)
69
+ if rc != 0:
70
+ return None
71
+ return out
72
+
73
+
74
+ def run_and_parse_first_match(run_lambda, command, regex):
75
+ """Runs command using run_lambda, returns the first regex match if it exists"""
76
+ rc, out, _ = run_lambda(command)
77
+ if rc != 0:
78
+ return None
79
+ match = re.search(regex, out)
80
+ if match is None:
81
+ return None
82
+ return match.group(1)
83
+
84
+ def run_and_return_first_line(run_lambda, command):
85
+ """Runs command using run_lambda and returns first line if output is not empty"""
86
+ rc, out, _ = run_lambda(command)
87
+ if rc != 0:
88
+ return None
89
+ return out.split('\n')[0]
90
+
91
+
92
+ def get_conda_packages(run_lambda):
93
+ conda = os.environ.get('CONDA_EXE', 'conda')
94
+ out = run_and_read_all(run_lambda, "{} list".format(conda))
95
+ if out is None:
96
+ return out
97
+
98
+ return "\n".join(
99
+ line
100
+ for line in out.splitlines()
101
+ if not line.startswith("#")
102
+ and any(
103
+ name in line
104
+ for name in {
105
+ "torch",
106
+ "numpy",
107
+ "cudatoolkit",
108
+ "soumith",
109
+ "mkl",
110
+ "magma",
111
+ "triton",
112
+ }
113
+ )
114
+ )
115
+
116
+ def get_gcc_version(run_lambda):
117
+ return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
118
+
119
+ def get_clang_version(run_lambda):
120
+ return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
121
+
122
+
123
+ def get_cmake_version(run_lambda):
124
+ return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
125
+
126
+
127
+ def get_nvidia_driver_version(run_lambda):
128
+ if get_platform() == 'darwin':
129
+ cmd = 'kextstat | grep -i cuda'
130
+ return run_and_parse_first_match(run_lambda, cmd,
131
+ r'com[.]nvidia[.]CUDA [(](.*?)[)]')
132
+ smi = get_nvidia_smi()
133
+ return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
134
+
135
+
136
+ def get_gpu_info(run_lambda):
137
+ if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
138
+ if TORCH_AVAILABLE and torch.cuda.is_available():
139
+ return torch.cuda.get_device_name(None)
140
+ return None
141
+ smi = get_nvidia_smi()
142
+ uuid_regex = re.compile(r' \(UUID: .+?\)')
143
+ rc, out, _ = run_lambda(smi + ' -L')
144
+ if rc != 0:
145
+ return None
146
+ # Anonymize GPUs by removing their UUID
147
+ return re.sub(uuid_regex, '', out)
148
+
149
+
150
+ def get_running_cuda_version(run_lambda):
151
+ return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
152
+
153
+
154
+ def get_cudnn_version(run_lambda):
155
+ """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
156
+ if get_platform() == 'win32':
157
+ system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
158
+ cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
159
+ where_cmd = os.path.join(system_root, 'System32', 'where')
160
+ cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
161
+ elif get_platform() == 'darwin':
162
+ # CUDA libraries and drivers can be found in /usr/local/cuda/. See
163
+ # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
164
+ # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
165
+ # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
166
+ cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
167
+ else:
168
+ cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
169
+ rc, out, _ = run_lambda(cudnn_cmd)
170
+ # find will return 1 if there are permission errors or if not found
171
+ if len(out) == 0 or (rc != 1 and rc != 0):
172
+ l = os.environ.get('CUDNN_LIBRARY')
173
+ if l is not None and os.path.isfile(l):
174
+ return os.path.realpath(l)
175
+ return None
176
+ files_set = set()
177
+ for fn in out.split('\n'):
178
+ fn = os.path.realpath(fn) # eliminate symbolic links
179
+ if os.path.isfile(fn):
180
+ files_set.add(fn)
181
+ if not files_set:
182
+ return None
183
+ # Alphabetize the result because the order is non-deterministic otherwise
184
+ files = sorted(files_set)
185
+ if len(files) == 1:
186
+ return files[0]
187
+ result = '\n'.join(files)
188
+ return 'Probably one of the following:\n{}'.format(result)
189
+
190
+
191
+ def get_nvidia_smi():
192
+ # Note: nvidia-smi is currently available only on Windows and Linux
193
+ smi = 'nvidia-smi'
194
+ if get_platform() == 'win32':
195
+ system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
196
+ program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
197
+ legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
198
+ new_path = os.path.join(system_root, 'System32', smi)
199
+ smis = [new_path, legacy_path]
200
+ for candidate_smi in smis:
201
+ if os.path.exists(candidate_smi):
202
+ smi = '"{}"'.format(candidate_smi)
203
+ break
204
+ return smi
205
+
206
+
207
+ # example outputs of CPU infos
208
+ # * linux
209
+ # Architecture: x86_64
210
+ # CPU op-mode(s): 32-bit, 64-bit
211
+ # Address sizes: 46 bits physical, 48 bits virtual
212
+ # Byte Order: Little Endian
213
+ # CPU(s): 128
214
+ # On-line CPU(s) list: 0-127
215
+ # Vendor ID: GenuineIntel
216
+ # Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
217
+ # CPU family: 6
218
+ # Model: 106
219
+ # Thread(s) per core: 2
220
+ # Core(s) per socket: 32
221
+ # Socket(s): 2
222
+ # Stepping: 6
223
+ # BogoMIPS: 5799.78
224
+ # Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
225
+ # sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
226
+ # xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
227
+ # pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
228
+ # hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
229
+ # fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
230
+ # avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
231
+ # xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
232
+ # avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
233
+ # Virtualization features:
234
+ # Hypervisor vendor: KVM
235
+ # Virtualization type: full
236
+ # Caches (sum of all):
237
+ # L1d: 3 MiB (64 instances)
238
+ # L1i: 2 MiB (64 instances)
239
+ # L2: 80 MiB (64 instances)
240
+ # L3: 108 MiB (2 instances)
241
+ # NUMA:
242
+ # NUMA node(s): 2
243
+ # NUMA node0 CPU(s): 0-31,64-95
244
+ # NUMA node1 CPU(s): 32-63,96-127
245
+ # Vulnerabilities:
246
+ # Itlb multihit: Not affected
247
+ # L1tf: Not affected
248
+ # Mds: Not affected
249
+ # Meltdown: Not affected
250
+ # Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
251
+ # Retbleed: Not affected
252
+ # Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
253
+ # Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization
254
+ # Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
255
+ # Srbds: Not affected
256
+ # Tsx async abort: Not affected
257
+ # * win32
258
+ # Architecture=9
259
+ # CurrentClockSpeed=2900
260
+ # DeviceID=CPU0
261
+ # Family=179
262
+ # L2CacheSize=40960
263
+ # L2CacheSpeed=
264
+ # Manufacturer=GenuineIntel
265
+ # MaxClockSpeed=2900
266
+ # Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
267
+ # ProcessorType=3
268
+ # Revision=27142
269
+ #
270
+ # Architecture=9
271
+ # CurrentClockSpeed=2900
272
+ # DeviceID=CPU1
273
+ # Family=179
274
+ # L2CacheSize=40960
275
+ # L2CacheSpeed=
276
+ # Manufacturer=GenuineIntel
277
+ # MaxClockSpeed=2900
278
+ # Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
279
+ # ProcessorType=3
280
+ # Revision=27142
281
+
282
+ def get_cpu_info(run_lambda):
283
+ rc, out, err = 0, '', ''
284
+ if get_platform() == 'linux':
285
+ rc, out, err = run_lambda('lscpu')
286
+ elif get_platform() == 'win32':
287
+ rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID,\
288
+ CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
289
+ elif get_platform() == 'darwin':
290
+ rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
291
+ cpu_info = 'None'
292
+ if rc == 0:
293
+ cpu_info = out
294
+ else:
295
+ cpu_info = err
296
+ return cpu_info
297
+
298
+
299
+ def get_platform():
300
+ if sys.platform.startswith('linux'):
301
+ return 'linux'
302
+ elif sys.platform.startswith('win32'):
303
+ return 'win32'
304
+ elif sys.platform.startswith('cygwin'):
305
+ return 'cygwin'
306
+ elif sys.platform.startswith('darwin'):
307
+ return 'darwin'
308
+ else:
309
+ return sys.platform
310
+
311
+
312
+ def get_mac_version(run_lambda):
313
+ return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
314
+
315
+
316
+ def get_windows_version(run_lambda):
317
+ system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
318
+ wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
319
+ findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
320
+ return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
321
+
322
+
323
+ def get_lsb_version(run_lambda):
324
+ return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
325
+
326
+
327
+ def check_release_file(run_lambda):
328
+ return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
329
+ r'PRETTY_NAME="(.*)"')
330
+
331
+
332
+ def get_os(run_lambda):
333
+ from platform import machine
334
+ platform = get_platform()
335
+
336
+ if platform == 'win32' or platform == 'cygwin':
337
+ return get_windows_version(run_lambda)
338
+
339
+ if platform == 'darwin':
340
+ version = get_mac_version(run_lambda)
341
+ if version is None:
342
+ return None
343
+ return 'macOS {} ({})'.format(version, machine())
344
+
345
+ if platform == 'linux':
346
+ # Ubuntu/Debian based
347
+ desc = get_lsb_version(run_lambda)
348
+ if desc is not None:
349
+ return '{} ({})'.format(desc, machine())
350
+
351
+ # Try reading /etc/*-release
352
+ desc = check_release_file(run_lambda)
353
+ if desc is not None:
354
+ return '{} ({})'.format(desc, machine())
355
+
356
+ return '{} ({})'.format(platform, machine())
357
+
358
+ # Unknown platform
359
+ return platform
360
+
361
+
362
+ def get_python_platform():
363
+ import platform
364
+ return platform.platform()
365
+
366
+
367
+ def get_libc_version():
368
+ import platform
369
+ if get_platform() != 'linux':
370
+ return 'N/A'
371
+ return '-'.join(platform.libc_ver())
372
+
373
+
374
+ def get_pip_packages(run_lambda):
375
+ """Returns `pip list` output. Note: will also find conda-installed pytorch
376
+ and numpy packages."""
377
+ # People generally have `pip` as `pip` or `pip3`
378
+ # But here it is invoked as `python -mpip`
379
+ def run_with_pip(pip):
380
+ out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
381
+ return "\n".join(
382
+ line
383
+ for line in out.splitlines()
384
+ if any(
385
+ name in line
386
+ for name in {
387
+ "torch",
388
+ "numpy",
389
+ "mypy",
390
+ "flake8",
391
+ "triton",
392
+ }
393
+ )
394
+ )
395
+
396
+ pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
397
+ out = run_with_pip([sys.executable, '-mpip'])
398
+
399
+ return pip_version, out
400
+
401
+
402
+ def get_cachingallocator_config():
403
+ ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
404
+ return ca_config
405
+
406
+
407
+ def get_cuda_module_loading_config():
408
+ if TORCH_AVAILABLE and torch.cuda.is_available():
409
+ torch.cuda.init()
410
+ config = os.environ.get('CUDA_MODULE_LOADING', '')
411
+ return config
412
+ else:
413
+ return "N/A"
414
+
415
+
416
+ def is_xnnpack_available():
417
+ if TORCH_AVAILABLE:
418
+ import torch.backends.xnnpack
419
+ return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
420
+ else:
421
+ return "N/A"
422
+
423
+ def get_env_info():
424
+ run_lambda = run
425
+ pip_version, pip_list_output = get_pip_packages(run_lambda)
426
+
427
+ if TORCH_AVAILABLE:
428
+ version_str = torch.__version__
429
+ debug_mode_str = str(torch.version.debug)
430
+ cuda_available_str = str(torch.cuda.is_available())
431
+ cuda_version_str = torch.version.cuda
432
+ if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version
433
+ hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
434
+ else: # HIP version
435
+ def get_version_or_na(cfg, prefix):
436
+ _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
437
+ return _lst[0] if _lst else 'N/A'
438
+
439
+ cfg = torch._C._show_config().split('\n')
440
+ hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
441
+ miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
442
+ cuda_version_str = 'N/A'
443
+ hip_compiled_version = torch.version.hip
444
+ else:
445
+ version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
446
+ hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
447
+
448
+ sys_version = sys.version.replace("\n", " ")
449
+
450
+ return SystemEnv(
451
+ torch_version=version_str,
452
+ is_debug_build=debug_mode_str,
453
+ python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
454
+ python_platform=get_python_platform(),
455
+ is_cuda_available=cuda_available_str,
456
+ cuda_compiled_version=cuda_version_str,
457
+ cuda_runtime_version=get_running_cuda_version(run_lambda),
458
+ cuda_module_loading=get_cuda_module_loading_config(),
459
+ nvidia_gpu_models=get_gpu_info(run_lambda),
460
+ nvidia_driver_version=get_nvidia_driver_version(run_lambda),
461
+ cudnn_version=get_cudnn_version(run_lambda),
462
+ hip_compiled_version=hip_compiled_version,
463
+ hip_runtime_version=hip_runtime_version,
464
+ miopen_runtime_version=miopen_runtime_version,
465
+ pip_version=pip_version,
466
+ pip_packages=pip_list_output,
467
+ conda_packages=get_conda_packages(run_lambda),
468
+ os=get_os(run_lambda),
469
+ libc_version=get_libc_version(),
470
+ gcc_version=get_gcc_version(run_lambda),
471
+ clang_version=get_clang_version(run_lambda),
472
+ cmake_version=get_cmake_version(run_lambda),
473
+ caching_allocator_config=get_cachingallocator_config(),
474
+ is_xnnpack_available=is_xnnpack_available(),
475
+ cpu_info=get_cpu_info(run_lambda),
476
+ )
477
+
478
+ env_info_fmt = """
479
+ PyTorch version: {torch_version}
480
+ Is debug build: {is_debug_build}
481
+ CUDA used to build PyTorch: {cuda_compiled_version}
482
+ ROCM used to build PyTorch: {hip_compiled_version}
483
+
484
+ OS: {os}
485
+ GCC version: {gcc_version}
486
+ Clang version: {clang_version}
487
+ CMake version: {cmake_version}
488
+ Libc version: {libc_version}
489
+
490
+ Python version: {python_version}
491
+ Python platform: {python_platform}
492
+ Is CUDA available: {is_cuda_available}
493
+ CUDA runtime version: {cuda_runtime_version}
494
+ CUDA_MODULE_LOADING set to: {cuda_module_loading}
495
+ GPU models and configuration: {nvidia_gpu_models}
496
+ Nvidia driver version: {nvidia_driver_version}
497
+ cuDNN version: {cudnn_version}
498
+ HIP runtime version: {hip_runtime_version}
499
+ MIOpen runtime version: {miopen_runtime_version}
500
+ Is XNNPACK available: {is_xnnpack_available}
501
+
502
+ CPU:
503
+ {cpu_info}
504
+
505
+ Versions of relevant libraries:
506
+ {pip_packages}
507
+ {conda_packages}
508
+ """.strip()
509
+
510
+
511
+ def pretty_str(envinfo):
512
+ def replace_nones(dct, replacement='Could not collect'):
513
+ for key in dct.keys():
514
+ if dct[key] is not None:
515
+ continue
516
+ dct[key] = replacement
517
+ return dct
518
+
519
+ def replace_bools(dct, true='Yes', false='No'):
520
+ for key in dct.keys():
521
+ if dct[key] is True:
522
+ dct[key] = true
523
+ elif dct[key] is False:
524
+ dct[key] = false
525
+ return dct
526
+
527
+ def prepend(text, tag='[prepend]'):
528
+ lines = text.split('\n')
529
+ updated_lines = [tag + line for line in lines]
530
+ return '\n'.join(updated_lines)
531
+
532
+ def replace_if_empty(text, replacement='No relevant packages'):
533
+ if text is not None and len(text) == 0:
534
+ return replacement
535
+ return text
536
+
537
+ def maybe_start_on_next_line(string):
538
+ # If `string` is multiline, prepend a \n to it.
539
+ if string is not None and len(string.split('\n')) > 1:
540
+ return '\n{}\n'.format(string)
541
+ return string
542
+
543
+ mutable_dict = envinfo._asdict()
544
+
545
+ # If nvidia_gpu_models is multiline, start on the next line
546
+ mutable_dict['nvidia_gpu_models'] = \
547
+ maybe_start_on_next_line(envinfo.nvidia_gpu_models)
548
+
549
+ # If the machine doesn't have CUDA, report some fields as 'No CUDA'
550
+ dynamic_cuda_fields = [
551
+ 'cuda_runtime_version',
552
+ 'nvidia_gpu_models',
553
+ 'nvidia_driver_version',
554
+ ]
555
+ all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
556
+ all_dynamic_cuda_fields_missing = all(
557
+ mutable_dict[field] is None for field in dynamic_cuda_fields)
558
+ if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
559
+ for field in all_cuda_fields:
560
+ mutable_dict[field] = 'No CUDA'
561
+ if envinfo.cuda_compiled_version is None:
562
+ mutable_dict['cuda_compiled_version'] = 'None'
563
+
564
+ # Replace True with Yes, False with No
565
+ mutable_dict = replace_bools(mutable_dict)
566
+
567
+ # Replace all None objects with 'Could not collect'
568
+ mutable_dict = replace_nones(mutable_dict)
569
+
570
+ # If either of these are '', replace with 'No relevant packages'
571
+ mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
572
+ mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
573
+
574
+ # Tag conda and pip packages with a prefix
575
+ # If they were previously None, they'll show up as ie '[conda] Could not collect'
576
+ if mutable_dict['pip_packages']:
577
+ mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
578
+ '[{}] '.format(envinfo.pip_version))
579
+ if mutable_dict['conda_packages']:
580
+ mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
581
+ '[conda] ')
582
+ mutable_dict['cpu_info'] = envinfo.cpu_info
583
+ return env_info_fmt.format(**mutable_dict)
584
+
585
+
586
+ def get_pretty_env_info():
587
+ return pretty_str(get_env_info())
588
+
589
+
590
+ def main():
591
+ print("Collecting environment information...")
592
+ output = get_pretty_env_info()
593
+ print(output)
594
+
595
+ if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
596
+ minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
597
+ if sys.platform == "linux" and os.path.exists(minidump_dir):
598
+ dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
599
+ latest = max(dumps, key=os.path.getctime)
600
+ ctime = os.path.getctime(latest)
601
+ creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
602
+ msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
603
+ "if this is related to your bug please include it when you file a report ***"
604
+ print(msg, file=sys.stderr)
605
+
606
+
607
+
608
+ if __name__ == '__main__':
609
+ main()
init_image.png ADDED
mask_image.png ADDED
model_ids.txt CHANGED
The diff for this file is too large to render. See raw diff
 
new_scheduler.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
3
+ import torch
4
+
5
+ path = "runwayml/stable-diffusion-v1-5"
6
+
7
+ run_compile = False # Set True / False
8
+ use_karras_sigmas = False
9
+
10
+ pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
11
+ pipe = pipe.to("cuda")
12
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=use_karras_sigmas)
13
+ pipe.unet.to(memory_format=torch.channels_last)
14
+
15
+ if run_compile:
16
+ print("Run torch compile")
17
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
18
+
19
+ prompt = "ghibli style, a fantasy landscape with castles"
20
+
21
+ for _ in range(3):
22
+ images = pipe(prompt=prompt).images
prompt_weight.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import os
4
+ from compel import Compel, ReturnedEmbeddingsType
5
+ from diffusers import DiffusionPipeline
6
+ from huggingface_hub import HfApi
7
+ from pathlib import Path
8
+
9
+ api = HfApi()
10
+
11
+ pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", use_safetensors=True, torch_dtype=torch.float16).to("cuda")
12
+
13
+ compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True])
14
+
15
+ # upweight "ball"
16
+ prompt = ["a red cat playing with a (ball)1.5", "a red cat playing with a (ball)0.6"]
17
+ conditioning, pooled = compel(prompt)
18
+
19
+
20
+ # generate image
21
+ generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))]
22
+ images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images
23
+
24
+ for i, image in enumerate(images):
25
+ file_name = f"bb_1_{i}"
26
+ path = os.path.join(Path.home(), "images", f"{file_name}.png")
27
+ image.save(path)
28
+
29
+ api.upload_file(
30
+ path_or_fileobj=path,
31
+ path_in_repo=path.split("/")[-1],
32
+ repo_id="patrickvonplaten/images",
33
+ repo_type="dataset",
34
+ )
35
+ print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
run_bug_conv.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+
7
+ class SuperConv(nn.Conv2d):
8
+
9
+ def __init__(self, *args, is_lora=False, **kwargs):
10
+ super().__init__(*args, **kwargs)
11
+
12
+ self.is_lora = is_lora
13
+
14
+ def forward(self, *args, **kwargs):
15
+ if self.is_lora:
16
+ return 3 + super().forward(*args, **kwargs)
17
+ else:
18
+ return super().forward(*args, **kwargs)
19
+
20
+ # Define a simple Convolutional Neural Network
21
+ class SimpleCNN(nn.Module):
22
+ def __init__(self):
23
+ super(SimpleCNN, self).__init__()
24
+ self.conv1 = SuperConv(3, 6, 5) # Assuming input images are RGB, so 3 input channels
25
+ self.pool = nn.MaxPool2d(2, 2)
26
+ self.conv2 = SuperConv(6, 16, 5)
27
+ self.fc1 = nn.Linear(16 * 5 * 5, 120)
28
+ self.fc2 = nn.Linear(120, 84)
29
+ self.fc3 = nn.Linear(84, 10)
30
+
31
+ def forward(self, x):
32
+ x = self.pool(F.relu(self.conv1(x)))
33
+ x = self.pool(F.relu(self.conv2(x)))
34
+ x = x.view(-1, 16 * 5 * 5)
35
+ x = F.relu(self.fc1(x))
36
+ x = F.relu(self.fc2(x))
37
+ x = self.fc3(x)
38
+ return x
39
+
40
+ # Create the network
41
+ net = SimpleCNN()
42
+
43
+ # Initialize weights with dummy values
44
+ for m in net.modules():
45
+ if isinstance(m, nn.Conv2d):
46
+ nn.init.constant_(m.weight, 0.1)
47
+ nn.init.constant_(m.bias, 0.1)
48
+ elif isinstance(m, nn.Linear):
49
+ nn.init.constant_(m.weight, 0.1)
50
+ nn.init.constant_(m.bias, 0.1)
51
+
52
+ # Perform inference
53
+ input = torch.randn(1, 3, 32, 32).to("cuda")
54
+ net = net.to("cuda")
55
+ output = net(input)
56
+
57
+ print(output)
58
+
59
+ net = torch.compile(net, mode="reduce-overhead", fullgraph=True)
60
+
61
+ output = net(input)
62
+
63
+ print(output)
run_local_fuse_xl.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from huggingface_hub import HfApi
3
+ import torch
4
+ from pathlib import Path
5
+ import os
6
+ import time
7
+
8
+ api = HfApi()
9
+ start_time = time.time()
10
+
11
+ from diffusers import DiffusionPipeline
12
+ import torch
13
+
14
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
15
+ pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
16
+ pipe.unet.fuse_lora()
17
+
18
+ pipe.to(torch_dtype=torch.float16)
19
+ pipe.to("cuda")
20
+
21
+ torch.manual_seed(0)
22
+
23
+ prompt = "beautiful scenery nature glass bottle landscape, , purple galaxy bottle"
24
+ negative_prompt = "text, watermark"
25
+
26
+ image = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=25).images[0]
27
+
28
+ file_name = f"aaa"
29
+ path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
30
+ image.save(path)
31
+
32
+ api.upload_file(
33
+ path_or_fileobj=path,
34
+ path_in_repo=path.split("/")[-1],
35
+ repo_id="patrickvonplaten/images",
36
+ repo_type="dataset",
37
+ )
38
+ print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
run_local_xl.py CHANGED
@@ -19,15 +19,14 @@ start_time = time.time()
19
  # use_refiner = bool(int(sys.argv[1]))
20
  use_refiner = True
21
  use_diffusers = True
22
- path = "/home/patrick/sai/stable-diffusion-xl-base-1.0"
23
- refiner_path = "/home/patrick/sai/stable-diffusion-xl-refiner-1.0"
24
- vae_path = "/home/patrick/sai/stable-diffusion-xl-base-1.0/vae/"
25
- vae_path = "/home/patrick/sai/sdxl-vae"
26
 
27
  vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
28
  if use_diffusers:
29
  # pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
30
- pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
31
  print(time.time() - start_time)
32
  pipe.to("cuda")
33
 
 
19
  # use_refiner = bool(int(sys.argv[1]))
20
  use_refiner = True
21
  use_diffusers = True
22
+ path = "stabilityai/stable-diffusion-xl-base-1.0"
23
+ refiner_path = "stabilityai/stable-diffusion-xl-refiner-1.0"
24
+ vae_path = "stabilityai/sdxl-vae"
 
25
 
26
  vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
27
  if use_diffusers:
28
  # pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
29
+ pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True, add_watermarker=False)
30
  print(time.time() - start_time)
31
  pipe.to("cuda")
32
 
run_lora.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler
3
+ import time
4
+ import os
5
+ from huggingface_hub import HfApi
6
+ # from compel import Compel
7
+ import torch
8
+ import sys
9
+ from pathlib import Path
10
+ import requests
11
+ from PIL import Image
12
+ from io import BytesIO
13
+
14
+ path = "runwayml/stable-diffusion-v1-5"
15
+ lora_id = "takuma104/lora-test-text-encoder-lora-target"
16
+
17
+ api = HfApi()
18
+ start_time = time.time()
19
+ pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
20
+ pipe.load_lora_weights(lora_id)
21
+ pipe = pipe.to("cuda")
22
+
23
+ prompt = "a red sks dog"
24
+
25
+ images = pipe(prompt=prompt,
26
+ num_inference_steps=15,
27
+ cross_attention_kwargs={"scale": 0.5},
28
+ generator=torch.manual_seed(0)
29
+ ).images
30
+
31
+
32
+ for i, image in enumerate(images):
33
+ file_name = f"aa_{i}"
34
+ path = os.path.join(Path.home(), "images", f"{file_name}.png")
35
+ image.save(path)
36
+
37
+ api.upload_file(
38
+ path_or_fileobj=path,
39
+ path_in_repo=path.split("/")[-1],
40
+ repo_id="patrickvonplaten/images",
41
+ repo_type="dataset",
42
+ )
43
+ print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
run_wuerst.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ from diffusers import AutoPipelineForText2Image
4
+ from huggingface_hub import HfApi
5
+ from pathlib import Path
6
+ import os
7
+
8
+ from PIL import Image
9
+ import numpy as np
10
+
11
+ api = HfApi()
12
+
13
+ pipe = AutoPipelineForText2Image.from_pretrained("warp-diffusion/WuerstchenGeneratorPipeline", torch_dtype=torch.float16).to("cuda")
14
+
15
+ prompt = [
16
+ "An old destroyed car standing on a cliff in norway, cinematic photography",
17
+ "Western movie, closeup cinematic photography",
18
+ "Pink nike shoe commercial, closeup cinematic photography",
19
+ "Croatia, closeup cinematic photography",
20
+ "South Tyrol mountains at sunset, closeup cinematic photography",
21
+ ]
22
+
23
+
24
+ images = pipe(prompt, guidance_scale=8.0, width=1024, height=1024).images
25
+
26
+ for i, image in enumerate(images):
27
+ file_name = f"bb_1_{i}"
28
+ path = os.path.join(Path.home(), "images", f"{file_name}.png")
29
+ image.save(path)
30
+
31
+ api.upload_file(
32
+ path_or_fileobj=path,
33
+ path_in_repo=path.split("/")[-1],
34
+ repo_id="patrickvonplaten/images",
35
+ repo_type="dataset",
36
+ )
37
+ print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
run_xl_lora.py CHANGED
@@ -8,7 +8,10 @@ import os
8
  api = HfApi()
9
 
10
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
11
- pipe.load_lora_weights("./sd_xl_offset_example-lora_1.0.safetensors")
 
 
 
12
  pipe.to(torch_dtype=torch.float16)
13
  pipe.to("cuda")
14
 
 
8
  api = HfApi()
9
 
10
  pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
11
+ pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
12
+ # pipe.unet.fuse_lora()
13
+ # 7.8 it/s to beat
14
+ #
15
  pipe.to(torch_dtype=torch.float16)
16
  pipe.to("cuda")
17
 
sd_xl_inpaint.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import AutoPipelineForInpainting, AutoPipelineForImage2Image
3
+ from diffusers.utils import load_image
4
+ import torch
5
+ from pathlib import Path
6
+ import os
7
+ from huggingface_hub import HfApi
8
+
9
+ torch.backends.cuda.matmul.allow_tf32 = True
10
+ torch_device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ api = HfApi()
12
+
13
+ pipe = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
14
+
15
+ pipe = pipe.to(torch_device)
16
+ pipe.enable_xformers_memory_efficient_attention()
17
+
18
+ img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
19
+ mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
20
+
21
+ image = load_image(img_url)
22
+ mask_image = load_image(mask_url)
23
+
24
+ prompt = "dslr photography of an empty bench, high quality"
25
+ generator = torch.Generator(device="cuda").manual_seed(0)
26
+
27
+ image = pipe(
28
+ prompt=prompt,
29
+ image=image,
30
+ mask_image=mask_image,
31
+ guidance_scale=8.0,
32
+ num_inference_steps=20,
33
+ generator=generator,
34
+ ).images[0]
35
+
36
+
37
+ image = image.resize((1024, 1024))
38
+
39
+ pipe = AutoPipelineForInpainting.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
40
+ pipe.to("cuda")
41
+
42
+ pipe.enable_xformers_memory_efficient_attention()
43
+
44
+ image = pipe(
45
+ prompt=prompt,
46
+ image=image,
47
+ mask_image=mask_image,
48
+ guidance_scale=8.0,
49
+ num_inference_steps=100,
50
+ strength=0.2,
51
+ generator=generator,
52
+ ).images[0]
53
+
54
+ pipe = AutoPipelineForImage2Image.from_pipe(pipe)
55
+ pipe.enable_xformers_memory_efficient_attention()
56
+
57
+ image = pipe(
58
+ prompt=prompt,
59
+ image=image,
60
+ guidance_scale=8.0,
61
+ num_inference_steps=100,
62
+ strength=0.2,
63
+ generator=generator,
64
+ ).images[0]
65
+
66
+ file_name = f"aaa"
67
+ path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
68
+ image.save(path)
69
+
70
+ api.upload_file(
71
+ path_or_fileobj=path,
72
+ path_in_repo=path.split("/")[-1],
73
+ repo_id="patrickvonplaten/images",
74
+ repo_type="dataset",
75
+ )
76
+ print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
train_unet.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import UNet2DConditionModel
3
+ import torch
4
+
5
+ torch.cuda.set_per_process_memory_fraction(0.5, device="cuda:1")
6
+
7
+ unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
8
+ unet.train()
9
+ unet.enable_gradient_checkpointing()
10
+ unet = unet.to("cuda:1")
11
+
12
+ batch_size = 2
13
+
14
+ sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
15
+ time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
16
+ encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
17
+ text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
18
+
19
+ out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
20
+
21
+ loss = ((out - sample) ** 2).mean()
22
+ loss.backward()
23
+
24
+ print(torch.cuda.max_memory_allocated(device=unet.device))