Spaces:
Paused
Paused
lzy-tony
commited on
Commit
•
d3653d5
1
Parent(s):
cca14a1
feat: add english ver
Browse files- .gitignore +130 -0
- README.md +5 -7
- app.py +616 -0
- assets/Arial.ttf +0 -0
- assets/chinese_char.txt +1000 -0
- assets/color_idx.json +1 -0
- assets/font_idx_512.json +1 -0
- assets/multilingual_cn-en_font_idx.json +1 -0
- checkpoints/glyph-sdxl/byt5_mapper.pt +3 -0
- checkpoints/glyph-sdxl/byt5_model.pt +3 -0
- checkpoints/glyph-sdxl/optimizer.bin +3 -0
- checkpoints/glyph-sdxl/scaler.pt +3 -0
- checkpoints/glyph-sdxl/scheduler.bin +3 -0
- checkpoints/glyph-sdxl/unet_inserted_attn.pt +3 -0
- checkpoints/glyph-sdxl/unet_lora.pt +3 -0
- configs/glyph_multilingual_sdxl_albedo.py +96 -0
- configs/glyph_sdxl.py +96 -0
- configs/glyph_sdxl_albedo.py +96 -0
- demo/constants.py +2 -0
- examples/easter.json +43 -0
- examples/easter.png +0 -0
- examples/new_year.json +54 -0
- examples/new_year.png +0 -0
- examples/pancake.json +67 -0
- examples/pancake.png +0 -0
- examples/shower.json +76 -0
- examples/shower.png +0 -0
- glyph_sdxl/custom_diffusers/__init__.py +2 -0
- glyph_sdxl/custom_diffusers/models/__init__.py +3 -0
- glyph_sdxl/custom_diffusers/models/cross_attn_insert_transformer_blocks.py +377 -0
- glyph_sdxl/custom_diffusers/pipelines/__init__.py +5 -0
- glyph_sdxl/custom_diffusers/pipelines/pipeline_stable_diffusion_glyph_xl.py +922 -0
- glyph_sdxl/modules/__init__.py +7 -0
- glyph_sdxl/modules/byt5_block_byt5_mapper.py +151 -0
- glyph_sdxl/modules/simple_byt5_mapper.py +16 -0
- glyph_sdxl/utils/__init__.py +23 -0
- glyph_sdxl/utils/constants.py +5 -0
- glyph_sdxl/utils/format_prompt.py +113 -0
- glyph_sdxl/utils/load_pretrained_byt5.py +60 -0
- glyph_sdxl/utils/parse_config.py +17 -0
- requirements.txt +10 -0
.gitignore
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
MANIFEST
|
27 |
+
|
28 |
+
# PyInstaller
|
29 |
+
# Usually these files are written by a python script from a template
|
30 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
31 |
+
*.manifest
|
32 |
+
*.spec
|
33 |
+
|
34 |
+
# Installer logs
|
35 |
+
pip-log.txt
|
36 |
+
pip-delete-this-directory.txt
|
37 |
+
|
38 |
+
# Unit test / coverage reports
|
39 |
+
htmlcov/
|
40 |
+
.tox/
|
41 |
+
.coverage
|
42 |
+
.coverage.*
|
43 |
+
.cache
|
44 |
+
nosetests.xml
|
45 |
+
coverage.xml
|
46 |
+
*.cover
|
47 |
+
.hypothesis/
|
48 |
+
.pytest_cache/
|
49 |
+
|
50 |
+
# Translations
|
51 |
+
*.mo
|
52 |
+
*.pot
|
53 |
+
|
54 |
+
# Django stuff:
|
55 |
+
*.log
|
56 |
+
local_settings.py
|
57 |
+
db.sqlite3
|
58 |
+
|
59 |
+
# Flask stuff:
|
60 |
+
instance/
|
61 |
+
.webassets-cache
|
62 |
+
|
63 |
+
# Scrapy stuff:
|
64 |
+
.scrapy
|
65 |
+
|
66 |
+
# Sphinx documentation
|
67 |
+
docs/en/_build/
|
68 |
+
docs/zh_cn/_build/
|
69 |
+
|
70 |
+
# PyBuilder
|
71 |
+
target/
|
72 |
+
|
73 |
+
# Jupyter Notebook
|
74 |
+
.ipynb_checkpoints
|
75 |
+
|
76 |
+
# pyenv
|
77 |
+
.python-version
|
78 |
+
|
79 |
+
# celery beat schedule file
|
80 |
+
celerybeat-schedule
|
81 |
+
|
82 |
+
# SageMath parsed files
|
83 |
+
*.sage.py
|
84 |
+
|
85 |
+
# Environments
|
86 |
+
.env
|
87 |
+
.venv
|
88 |
+
env/
|
89 |
+
venv/
|
90 |
+
ENV/
|
91 |
+
env.bak/
|
92 |
+
venv.bak/
|
93 |
+
.DS_Store
|
94 |
+
|
95 |
+
# Spyder project settings
|
96 |
+
.spyderproject
|
97 |
+
.spyproject
|
98 |
+
|
99 |
+
# Rope project settings
|
100 |
+
.ropeproject
|
101 |
+
|
102 |
+
# mkdocs documentation
|
103 |
+
/site
|
104 |
+
|
105 |
+
# mypy
|
106 |
+
.mypy_cache/
|
107 |
+
|
108 |
+
data
|
109 |
+
.vscode
|
110 |
+
.vscode/settings.json
|
111 |
+
.idea
|
112 |
+
|
113 |
+
# custom
|
114 |
+
*.pkl
|
115 |
+
*.pkl.json
|
116 |
+
*.log.json
|
117 |
+
work_dirs/
|
118 |
+
|
119 |
+
# Pytorch
|
120 |
+
|
121 |
+
weights
|
122 |
+
wandb
|
123 |
+
temp
|
124 |
+
test.py
|
125 |
+
debug
|
126 |
+
*.html
|
127 |
+
htmls
|
128 |
+
debug.png
|
129 |
+
|
130 |
+
canva.fonts.json
|
README.md
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
---
|
2 |
-
title: Glyph
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Glyph-SDXL-debug
|
3 |
+
emoji: 🖼️🖌️
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.27.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,616 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import json
|
3 |
+
import webcolors
|
4 |
+
import spaces
|
5 |
+
import gradio as gr
|
6 |
+
import os.path as osp
|
7 |
+
from PIL import Image, ImageDraw, ImageFont
|
8 |
+
|
9 |
+
import torch
|
10 |
+
from diffusers import UNet2DConditionModel, AutoencoderKL
|
11 |
+
from diffusers.models.attention import BasicTransformerBlock
|
12 |
+
from peft import LoraConfig
|
13 |
+
from peft.utils import set_peft_model_state_dict
|
14 |
+
from transformers import PretrainedConfig
|
15 |
+
|
16 |
+
from diffusers import DPMSolverMultistepScheduler
|
17 |
+
|
18 |
+
from glyph_sdxl.utils import (
|
19 |
+
parse_config,
|
20 |
+
UNET_CKPT_NAME,
|
21 |
+
huggingface_cache_dir,
|
22 |
+
load_byt5_and_byt5_tokenizer,
|
23 |
+
BYT5_MAPPER_CKPT_NAME,
|
24 |
+
INSERTED_ATTN_CKPT_NAME,
|
25 |
+
BYT5_CKPT_NAME,
|
26 |
+
PromptFormat,
|
27 |
+
)
|
28 |
+
from glyph_sdxl.custom_diffusers import (
|
29 |
+
StableDiffusionGlyphXLPipeline,
|
30 |
+
CrossAttnInsertBasicTransformerBlock,
|
31 |
+
)
|
32 |
+
from glyph_sdxl.modules import T5EncoderBlockByT5Mapper
|
33 |
+
|
34 |
+
byt5_mapper_dict = [T5EncoderBlockByT5Mapper]
|
35 |
+
byt5_mapper_dict = {mapper.__name__: mapper for mapper in byt5_mapper_dict}
|
36 |
+
|
37 |
+
from demo.constants import MAX_TEXT_BOX
|
38 |
+
|
39 |
+
|
40 |
+
html = f"""<h1>Glyph-ByT5: A Customized Text Encoder for Accurate Visual Text Rendering</h1>
|
41 |
+
<h2><a href='https://glyph-byt5.github.io/'>Project Page</a> | <a href='https://arxiv.org/abs/2403.09622'>arXiv Paper</a> | <a href=''>Github</a> | <a href=''>Cite our work</a> if our ideas inspire you.</h2>
|
42 |
+
<p><b>Try some examples at the bottom of the page to get started!</b></p>
|
43 |
+
<p><b>Usage:</b></p>
|
44 |
+
<p>1. <b>Select bounding boxes</b> on the canvas on the left <b>by clicking twice</b>. </p>
|
45 |
+
<p>2. Click "Redo" if you want to cancel last point, "Undo" for clearing the canvas. </p>
|
46 |
+
<p>3. <b>Click "I've finished my layout!"</b> to start choosing specific prompts, colors and font-types. </p>
|
47 |
+
<p>4. Enter a <b>design prompt</b> for the background image. Optionally, you can choose to specify the design categories and tags (separated by a comma). </p>
|
48 |
+
<p>5. For each text box, <b>enter the text prompts in the text box</b> on the left, and <b>select colors and font-types from the drop boxes</b> on the right. </p>
|
49 |
+
<p>6. <b>Click on "I've finished my texts, colors and styles, generate!"</b> to start generating!. </p>
|
50 |
+
<style>.btn {{flex-grow: unset !important;}} </p>
|
51 |
+
"""
|
52 |
+
|
53 |
+
|
54 |
+
css = '''
|
55 |
+
#color-bg{display:flex;justify-content: center;align-items: center;}
|
56 |
+
.color-bg-item{width: 100%; height: 32px}
|
57 |
+
#main_button{width:100%}
|
58 |
+
<style>
|
59 |
+
'''
|
60 |
+
|
61 |
+
state = 0
|
62 |
+
stack = []
|
63 |
+
font = ImageFont.truetype("assets/Arial.ttf", 20)
|
64 |
+
|
65 |
+
device = "cuda"
|
66 |
+
|
67 |
+
def import_model_class_from_model_name_or_path(
|
68 |
+
pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder",
|
69 |
+
):
|
70 |
+
text_encoder_config = PretrainedConfig.from_pretrained(
|
71 |
+
pretrained_model_name_or_path,
|
72 |
+
subfolder=subfolder,
|
73 |
+
revision=revision,
|
74 |
+
)
|
75 |
+
model_class = text_encoder_config.architectures[0]
|
76 |
+
|
77 |
+
if model_class == "CLIPTextModel":
|
78 |
+
from transformers import CLIPTextModel
|
79 |
+
|
80 |
+
return CLIPTextModel
|
81 |
+
elif model_class == "CLIPTextModelWithProjection":
|
82 |
+
from transformers import CLIPTextModelWithProjection
|
83 |
+
|
84 |
+
return CLIPTextModelWithProjection
|
85 |
+
else:
|
86 |
+
raise ValueError(f"{model_class} is not supported.")
|
87 |
+
|
88 |
+
config = parse_config('configs/glyph_sdxl_albedo.py')
|
89 |
+
ckpt_dir = 'checkpoints/glyph-sdxl'
|
90 |
+
|
91 |
+
text_encoder_cls_one = import_model_class_from_model_name_or_path(
|
92 |
+
config.pretrained_model_name_or_path, config.revision,
|
93 |
+
)
|
94 |
+
text_encoder_cls_two = import_model_class_from_model_name_or_path(
|
95 |
+
config.pretrained_model_name_or_path, config.revision, subfolder="text_encoder_2",
|
96 |
+
)
|
97 |
+
text_encoder_one = text_encoder_cls_one.from_pretrained(
|
98 |
+
config.pretrained_model_name_or_path, subfolder="text_encoder", revision=config.revision,
|
99 |
+
cache_dir=huggingface_cache_dir,
|
100 |
+
)
|
101 |
+
text_encoder_two = text_encoder_cls_two.from_pretrained(
|
102 |
+
config.pretrained_model_name_or_path, subfolder="text_encoder_2", revision=config.revision,
|
103 |
+
cache_dir=huggingface_cache_dir,
|
104 |
+
)
|
105 |
+
|
106 |
+
unet = UNet2DConditionModel.from_pretrained(
|
107 |
+
config.pretrained_model_name_or_path,
|
108 |
+
subfolder="unet",
|
109 |
+
revision=config.revision,
|
110 |
+
cache_dir=huggingface_cache_dir,
|
111 |
+
)
|
112 |
+
|
113 |
+
vae_path = (
|
114 |
+
config.pretrained_model_name_or_path
|
115 |
+
if config.pretrained_vae_model_name_or_path is None
|
116 |
+
else config.pretrained_vae_model_name_or_path
|
117 |
+
)
|
118 |
+
vae = AutoencoderKL.from_pretrained(
|
119 |
+
vae_path, subfolder="vae" if config.pretrained_vae_model_name_or_path is None else None,
|
120 |
+
revision=config.revision,
|
121 |
+
cache_dir=huggingface_cache_dir,
|
122 |
+
)
|
123 |
+
|
124 |
+
byt5_model, byt5_tokenizer = load_byt5_and_byt5_tokenizer(
|
125 |
+
**config.byt5_config,
|
126 |
+
huggingface_cache_dir=huggingface_cache_dir,
|
127 |
+
)
|
128 |
+
|
129 |
+
inference_dtype = torch.float32
|
130 |
+
if config.inference_dtype == "fp16":
|
131 |
+
inference_dtype = torch.float16
|
132 |
+
elif config.inference_dtype == "bf16":
|
133 |
+
inference_dtype = torch.bfloat16
|
134 |
+
|
135 |
+
inserted_new_modules_para_set = set()
|
136 |
+
for name, module in unet.named_modules():
|
137 |
+
if isinstance(module, BasicTransformerBlock) and name in config.attn_block_to_modify:
|
138 |
+
parent_module = unet
|
139 |
+
for n in name.split(".")[:-1]:
|
140 |
+
parent_module = getattr(parent_module, n)
|
141 |
+
new_block = CrossAttnInsertBasicTransformerBlock.from_transformer_block(
|
142 |
+
module,
|
143 |
+
byt5_model.config.d_model if config.byt5_mapper_config.sdxl_channels is None else config.byt5_mapper_config.sdxl_channels,
|
144 |
+
)
|
145 |
+
new_block.requires_grad_(False)
|
146 |
+
for inserted_module_name, inserted_module in zip(
|
147 |
+
new_block.get_inserted_modules_names(),
|
148 |
+
new_block.get_inserted_modules()
|
149 |
+
):
|
150 |
+
inserted_module.requires_grad_(True)
|
151 |
+
for para_name, para in inserted_module.named_parameters():
|
152 |
+
para_key = name + '.' + inserted_module_name + '.' + para_name
|
153 |
+
assert para_key not in inserted_new_modules_para_set
|
154 |
+
inserted_new_modules_para_set.add(para_key)
|
155 |
+
for origin_module in new_block.get_origin_modules():
|
156 |
+
origin_module.to(dtype=inference_dtype)
|
157 |
+
parent_module.register_module(name.split(".")[-1], new_block)
|
158 |
+
print(f"inserted cross attn block to {name}")
|
159 |
+
|
160 |
+
byt5_mapper = byt5_mapper_dict[config.byt5_mapper_type](
|
161 |
+
byt5_model.config,
|
162 |
+
**config.byt5_mapper_config,
|
163 |
+
)
|
164 |
+
|
165 |
+
unet_lora_target_modules = [
|
166 |
+
"attn1.to_k", "attn1.to_q", "attn1.to_v", "attn1.to_out.0",
|
167 |
+
"attn2.to_k", "attn2.to_q", "attn2.to_v", "attn2.to_out.0",
|
168 |
+
]
|
169 |
+
unet_lora_config = LoraConfig(
|
170 |
+
r=config.unet_lora_rank,
|
171 |
+
lora_alpha=config.unet_lora_rank,
|
172 |
+
init_lora_weights="gaussian",
|
173 |
+
target_modules=unet_lora_target_modules,
|
174 |
+
)
|
175 |
+
unet.add_adapter(unet_lora_config)
|
176 |
+
|
177 |
+
unet_lora_layers_para = torch.load(osp.join(ckpt_dir, UNET_CKPT_NAME), map_location='cpu')
|
178 |
+
incompatible_keys = set_peft_model_state_dict(unet, unet_lora_layers_para, adapter_name="default")
|
179 |
+
if getattr(incompatible_keys, 'unexpected_keys', []) == []:
|
180 |
+
print(f"loaded unet_lora_layers_para")
|
181 |
+
else:
|
182 |
+
print(f"unet_lora_layers has unexpected_keys: {getattr(incompatible_keys, 'unexpected_keys', None)}")
|
183 |
+
|
184 |
+
inserted_attn_module_paras = torch.load(osp.join(ckpt_dir, INSERTED_ATTN_CKPT_NAME), map_location='cpu')
|
185 |
+
missing_keys, unexpected_keys = unet.load_state_dict(inserted_attn_module_paras, strict=False)
|
186 |
+
assert len(unexpected_keys) == 0, unexpected_keys
|
187 |
+
|
188 |
+
byt5_mapper_para = torch.load(osp.join(ckpt_dir, BYT5_MAPPER_CKPT_NAME), map_location='cpu')
|
189 |
+
byt5_mapper.load_state_dict(byt5_mapper_para)
|
190 |
+
|
191 |
+
byt5_model_para = torch.load(osp.join(ckpt_dir, BYT5_CKPT_NAME), map_location='cpu')
|
192 |
+
byt5_model.load_state_dict(byt5_model_para)
|
193 |
+
|
194 |
+
pipeline = StableDiffusionGlyphXLPipeline.from_pretrained(
|
195 |
+
config.pretrained_model_name_or_path,
|
196 |
+
vae=vae,
|
197 |
+
text_encoder=text_encoder_one,
|
198 |
+
text_encoder_2=text_encoder_two,
|
199 |
+
byt5_text_encoder=byt5_model,
|
200 |
+
byt5_tokenizer=byt5_tokenizer,
|
201 |
+
byt5_mapper=byt5_mapper,
|
202 |
+
unet=unet,
|
203 |
+
byt5_max_length=config.byt5_max_length,
|
204 |
+
revision=config.revision,
|
205 |
+
torch_dtype=inference_dtype,
|
206 |
+
safety_checker=None,
|
207 |
+
cache_dir=huggingface_cache_dir,
|
208 |
+
)
|
209 |
+
|
210 |
+
pipeline.scheduler = DPMSolverMultistepScheduler.from_pretrained(
|
211 |
+
config.pretrained_model_name_or_path,
|
212 |
+
subfolder="scheduler",
|
213 |
+
use_karras_sigmas=True,
|
214 |
+
)
|
215 |
+
|
216 |
+
prompt_format = PromptFormat()
|
217 |
+
|
218 |
+
def get_pixels(
|
219 |
+
box_sketch_template,
|
220 |
+
evt: gr.SelectData
|
221 |
+
):
|
222 |
+
global state
|
223 |
+
global stack
|
224 |
+
|
225 |
+
text_position = evt.index
|
226 |
+
|
227 |
+
if state == 0:
|
228 |
+
stack.append(text_position)
|
229 |
+
state = 1
|
230 |
+
else:
|
231 |
+
x, y = stack.pop()
|
232 |
+
stack.append([x, y, text_position[0], text_position[1]])
|
233 |
+
state = 0
|
234 |
+
|
235 |
+
print(stack)
|
236 |
+
|
237 |
+
box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
|
238 |
+
draw = ImageDraw.Draw(box_sketch_template)
|
239 |
+
|
240 |
+
for i, text_position in enumerate(stack):
|
241 |
+
if len(text_position) == 2:
|
242 |
+
x, y = text_position
|
243 |
+
r = 4
|
244 |
+
leftUpPoint = (x-r, y-r)
|
245 |
+
rightDownPoint = (x+r, y+r)
|
246 |
+
|
247 |
+
text_color = (255, 0, 0)
|
248 |
+
draw.text((x+2, y), str(i + 1), font=font, fill=text_color)
|
249 |
+
|
250 |
+
draw.ellipse((leftUpPoint,rightDownPoint), fill='red')
|
251 |
+
elif len(text_position) == 4:
|
252 |
+
x0, y0, x1, y1 = text_position
|
253 |
+
x0, x1 = min(x0, x1), max(x0, x1)
|
254 |
+
y0, y1 = min(y0, y1), max(y0, y1)
|
255 |
+
r = 4
|
256 |
+
leftUpPoint = (x0-r, y0-r)
|
257 |
+
rightDownPoint = (x0+r, y0+r)
|
258 |
+
|
259 |
+
text_color = (255, 0, 0)
|
260 |
+
draw.text((x0+2, y0), str(i + 1), font=font, fill=text_color)
|
261 |
+
|
262 |
+
draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0))
|
263 |
+
|
264 |
+
return box_sketch_template
|
265 |
+
|
266 |
+
def exe_redo(
|
267 |
+
box_sketch_template
|
268 |
+
):
|
269 |
+
global state
|
270 |
+
global stack
|
271 |
+
|
272 |
+
state = 1 - state
|
273 |
+
if len(stack[-1]) == 2:
|
274 |
+
stack = stack[:-1]
|
275 |
+
else:
|
276 |
+
x, y, _, _ = stack[-1]
|
277 |
+
stack = stack[:-1] + [[x, y]]
|
278 |
+
|
279 |
+
box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
|
280 |
+
draw = ImageDraw.Draw(box_sketch_template)
|
281 |
+
|
282 |
+
for i, text_position in enumerate(stack):
|
283 |
+
if len(text_position) == 2:
|
284 |
+
x, y = text_position
|
285 |
+
r = 4
|
286 |
+
leftUpPoint = (x-r, y-r)
|
287 |
+
rightDownPoint = (x+r, y+r)
|
288 |
+
|
289 |
+
text_color = (255, 0, 0)
|
290 |
+
draw.text((x+2, y), str(i+1), font=font, fill=text_color)
|
291 |
+
|
292 |
+
draw.ellipse((leftUpPoint, rightDownPoint), fill='red')
|
293 |
+
elif len(text_position) == 4:
|
294 |
+
x0, y0, x1, y1 = text_position
|
295 |
+
x0, x1 = min(x0, x1), max(x0, x1)
|
296 |
+
y0, y1 = min(y0, y1), max(y0, y1)
|
297 |
+
r = 4
|
298 |
+
leftUpPoint = (x0-r, y0-r)
|
299 |
+
rightDownPoint = (x0+r, y0+r)
|
300 |
+
|
301 |
+
text_color = (255, 0, 0)
|
302 |
+
draw.text((x0+2, y0), str(i+1), font=font, fill=text_color)
|
303 |
+
|
304 |
+
draw.rectangle((x0,y0,x1,y1), outline=(255, 0, 0))
|
305 |
+
|
306 |
+
return box_sketch_template
|
307 |
+
|
308 |
+
def exe_undo(
|
309 |
+
box_sketch_template
|
310 |
+
):
|
311 |
+
global state
|
312 |
+
global stack
|
313 |
+
|
314 |
+
state = 0
|
315 |
+
stack = []
|
316 |
+
box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
|
317 |
+
|
318 |
+
return box_sketch_template
|
319 |
+
|
320 |
+
def process_box():
|
321 |
+
global stack
|
322 |
+
global state
|
323 |
+
|
324 |
+
visibilities = []
|
325 |
+
for _ in range(MAX_TEXT_BOX + 1):
|
326 |
+
visibilities.append(gr.update(visible=False))
|
327 |
+
for n in range(len(stack) + 1):
|
328 |
+
visibilities[n] = gr.update(visible=True)
|
329 |
+
|
330 |
+
# return [gr.update(visible=True), binary_matrixes, *visibilities, *colors]
|
331 |
+
return [gr.update(visible=True), *visibilities]
|
332 |
+
|
333 |
+
@spaces.GPU
|
334 |
+
def generate_image(bg_prompt, bg_class, bg_tags, seed, *conditions):
|
335 |
+
print(conditions)
|
336 |
+
|
337 |
+
# 0 load model to cuda
|
338 |
+
global pipeline
|
339 |
+
if config.pretrained_vae_model_name_or_path is None:
|
340 |
+
vae.to(device, dtype=torch.float32)
|
341 |
+
else:
|
342 |
+
vae.to(device, dtype=inference_dtype)
|
343 |
+
text_encoder_one.to(device, dtype=inference_dtype)
|
344 |
+
text_encoder_two.to(device, dtype=inference_dtype)
|
345 |
+
byt5_model.to(device)
|
346 |
+
unet.to(device, dtype=inference_dtype)
|
347 |
+
pipeline = pipeline.to(device)
|
348 |
+
|
349 |
+
# 1. parse input
|
350 |
+
global state
|
351 |
+
global stack
|
352 |
+
|
353 |
+
prompts = []
|
354 |
+
colors = []
|
355 |
+
font_type = []
|
356 |
+
bboxes = []
|
357 |
+
num_boxes = len(stack) if len(stack[-1]) == 4 else len(stack) - 1
|
358 |
+
for i in range(num_boxes):
|
359 |
+
prompts.append(conditions[i])
|
360 |
+
colors.append(conditions[i + MAX_TEXT_BOX])
|
361 |
+
font_type.append(conditions[i + MAX_TEXT_BOX * 2])
|
362 |
+
|
363 |
+
# 2. input check
|
364 |
+
styles = []
|
365 |
+
if bg_prompt == "" or bg_prompt is None:
|
366 |
+
raise gr.Error("Empty background prompt!")
|
367 |
+
for i, (prompt, color, style) in enumerate(zip(prompts, colors, font_type)):
|
368 |
+
if prompt == "" or prompt is None:
|
369 |
+
raise gr.Error(f"Invalid prompt for text box {i + 1} !")
|
370 |
+
if color is None:
|
371 |
+
raise gr.Error(f"Invalid color for text box {i + 1} !")
|
372 |
+
if style is None:
|
373 |
+
raise gr.Error(f"Invalid style for text box {i + 1} !")
|
374 |
+
bboxes.append(
|
375 |
+
[
|
376 |
+
stack[i][0] / 1024,
|
377 |
+
stack[i][1] / 1024,
|
378 |
+
(stack[i][2] - stack[i][0]) / 1024,
|
379 |
+
(stack[i][3] - stack[i][1]) / 1024,
|
380 |
+
]
|
381 |
+
)
|
382 |
+
styles.append(
|
383 |
+
{
|
384 |
+
'color': webcolors.name_to_hex(color),
|
385 |
+
'font-family': style,
|
386 |
+
}
|
387 |
+
)
|
388 |
+
|
389 |
+
# 3. format input
|
390 |
+
if bg_class != "" and bg_class is not None:
|
391 |
+
bg_prompt = bg_class + ". " + bg_prompt
|
392 |
+
if bg_tags != "" and bg_tags is not None:
|
393 |
+
bg_prompt += " Tags: " + bg_tags
|
394 |
+
text_prompt = prompt_format.format_prompt(prompts, styles)
|
395 |
+
|
396 |
+
print(bg_prompt)
|
397 |
+
print(text_prompt)
|
398 |
+
|
399 |
+
# 4. inference
|
400 |
+
if seed == -1:
|
401 |
+
generator = torch.Generator(device=device)
|
402 |
+
else:
|
403 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
404 |
+
with torch.cuda.amp.autocast():
|
405 |
+
image = pipeline(
|
406 |
+
prompt=bg_prompt,
|
407 |
+
text_prompt=text_prompt,
|
408 |
+
texts=prompts,
|
409 |
+
bboxes=bboxes,
|
410 |
+
num_inference_steps=50,
|
411 |
+
generator=generator,
|
412 |
+
text_attn_mask=None,
|
413 |
+
).images[0]
|
414 |
+
return image
|
415 |
+
|
416 |
+
def process_example(bg_prompt, bg_class, bg_tags, color_str, style_str, text_str, box_str, seed):
|
417 |
+
global stack
|
418 |
+
global state
|
419 |
+
|
420 |
+
colors = color_str.split(",")
|
421 |
+
styles = style_str.split(",")
|
422 |
+
boxes = box_str.split(";")
|
423 |
+
prompts = text_str.split("**********")
|
424 |
+
colors = [color.strip() for color in colors]
|
425 |
+
styles = [style.strip() for style in styles]
|
426 |
+
colors += [None] * (MAX_TEXT_BOX - len(colors))
|
427 |
+
styles += [None] * (MAX_TEXT_BOX - len(styles))
|
428 |
+
prompts += [""] * (MAX_TEXT_BOX - len(prompts))
|
429 |
+
|
430 |
+
state = 0
|
431 |
+
stack = []
|
432 |
+
print(boxes)
|
433 |
+
for box in boxes:
|
434 |
+
print(box)
|
435 |
+
box = box.strip()[1:-1]
|
436 |
+
print(box)
|
437 |
+
box = box.split(",")
|
438 |
+
print(box)
|
439 |
+
x = eval(box[0].strip()) * 1024
|
440 |
+
y = eval(box[1].strip()) * 1024
|
441 |
+
w = eval(box[2].strip()) * 1024
|
442 |
+
h = eval(box[3].strip()) * 1024
|
443 |
+
stack.append([int(x), int(y), int(x + w + 0.5), int(y + h + 0.5)])
|
444 |
+
|
445 |
+
visibilities = []
|
446 |
+
for _ in range(MAX_TEXT_BOX + 1):
|
447 |
+
visibilities.append(gr.update(visible=False))
|
448 |
+
for n in range(len(stack) + 1):
|
449 |
+
visibilities[n] = gr.update(visible=True)
|
450 |
+
|
451 |
+
box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
|
452 |
+
draw = ImageDraw.Draw(box_sketch_template)
|
453 |
+
|
454 |
+
for i, text_position in enumerate(stack):
|
455 |
+
if len(text_position) == 2:
|
456 |
+
x, y = text_position
|
457 |
+
r = 4
|
458 |
+
leftUpPoint = (x-r, y-r)
|
459 |
+
rightDownPoint = (x+r, y+r)
|
460 |
+
|
461 |
+
text_color = (255, 0, 0)
|
462 |
+
draw.text((x+2, y), str(i + 1), font=font, fill=text_color)
|
463 |
+
|
464 |
+
draw.ellipse((leftUpPoint,rightDownPoint), fill='red')
|
465 |
+
elif len(text_position) == 4:
|
466 |
+
x0, y0, x1, y1 = text_position
|
467 |
+
x0, x1 = min(x0, x1), max(x0, x1)
|
468 |
+
y0, y1 = min(y0, y1), max(y0, y1)
|
469 |
+
r = 4
|
470 |
+
leftUpPoint = (x0-r, y0-r)
|
471 |
+
rightDownPoint = (x0+r, y0+r)
|
472 |
+
|
473 |
+
text_color = (255, 0, 0)
|
474 |
+
draw.text((x0+2, y0), str(i + 1), font=font, fill=text_color)
|
475 |
+
|
476 |
+
draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0))
|
477 |
+
|
478 |
+
return [
|
479 |
+
gr.update(visible=True), box_sketch_template, seed, *visibilities, *colors, *styles, *prompts,
|
480 |
+
]
|
481 |
+
|
482 |
+
def main():
|
483 |
+
# load configs
|
484 |
+
with open('assets/color_idx.json', 'r') as f:
|
485 |
+
color_idx_dict = json.load(f)
|
486 |
+
color_idx_list = list(color_idx_dict)
|
487 |
+
with open('assets/font_idx_512.json', 'r') as f:
|
488 |
+
font_idx_dict = json.load(f)
|
489 |
+
font_idx_list = list(font_idx_dict)
|
490 |
+
|
491 |
+
with gr.Blocks(
|
492 |
+
title="Glyph-ByT5: A Customized Text Encoder for Accurate Visual Text Rendering",
|
493 |
+
css=css,
|
494 |
+
) as demo:
|
495 |
+
gr.HTML(html)
|
496 |
+
with gr.Row():
|
497 |
+
with gr.Column(elem_id="main-image"):
|
498 |
+
box_sketch_template = gr.Image(
|
499 |
+
value=Image.new('RGB', (1024, 1024), (255, 255, 255)),
|
500 |
+
sources=[],
|
501 |
+
interactive=False,
|
502 |
+
)
|
503 |
+
|
504 |
+
box_sketch_template.select(get_pixels, [box_sketch_template], [box_sketch_template])
|
505 |
+
|
506 |
+
with gr.Row():
|
507 |
+
redo = gr.Button(value='Redo - Cancel last point')
|
508 |
+
undo = gr.Button(value='Undo - Clear the canvas')
|
509 |
+
redo.click(exe_redo, [box_sketch_template], [box_sketch_template])
|
510 |
+
undo.click(exe_undo, [box_sketch_template], [box_sketch_template])
|
511 |
+
|
512 |
+
button_layout = gr.Button("(1) I've finished my layout!", elem_id="main_button", interactive=True)
|
513 |
+
|
514 |
+
prompts = []
|
515 |
+
colors = []
|
516 |
+
styles = []
|
517 |
+
color_row = [None] * (MAX_TEXT_BOX + 1)
|
518 |
+
with gr.Column(visible=False) as post_box:
|
519 |
+
for n in range(MAX_TEXT_BOX + 1):
|
520 |
+
if n == 0 :
|
521 |
+
with gr.Row(visible=True) as color_row[n]:
|
522 |
+
bg_prompt = gr.Textbox(label="Design prompt for the background image", value="")
|
523 |
+
bg_class = gr.Textbox(label="Design type for the background image (optional)", value="")
|
524 |
+
bg_tags = gr.Textbox(label="Design type for the background image (optional)", value="")
|
525 |
+
else:
|
526 |
+
with gr.Row(visible=False) as color_row[n]:
|
527 |
+
prompts.append(gr.Textbox(label="Prompt for box "+str(n)))
|
528 |
+
colors.append(gr.Dropdown(
|
529 |
+
label="Color for box "+str(n),
|
530 |
+
choices=color_idx_list,
|
531 |
+
))
|
532 |
+
styles.append(gr.Dropdown(
|
533 |
+
label="Font type for box "+str(n),
|
534 |
+
choices=font_idx_list,
|
535 |
+
))
|
536 |
+
|
537 |
+
seed_ = gr.Slider(label="Seed", minimum=-1, maximum=999999999, value=-1, step=1)
|
538 |
+
button_generate = gr.Button("(2) I've finished my texts, colors and styles, generate!", elem_id="main_button", interactive=True)
|
539 |
+
|
540 |
+
button_layout.click(process_box, inputs=[], outputs=[post_box, *color_row], queue=False)
|
541 |
+
|
542 |
+
with gr.Column():
|
543 |
+
output_image = gr.Image(label="Output Image", interactive=False)
|
544 |
+
|
545 |
+
button_generate.click(generate_image, inputs=[bg_prompt, bg_class, bg_tags, seed_, *(prompts + colors + styles)], outputs=[output_image], queue=True)
|
546 |
+
|
547 |
+
# examples
|
548 |
+
color_str = gr.Textbox(label="Color list", value="", visible=False)
|
549 |
+
style_str = gr.Textbox(label="Font type list", value="", visible=False)
|
550 |
+
box_str = gr.Textbox(label="Bbox list", value="", visible=False)
|
551 |
+
text_str = gr.Textbox(label="Text list", value="", visible=False)
|
552 |
+
|
553 |
+
gr.Examples(
|
554 |
+
examples=[
|
555 |
+
[
|
556 |
+
'The image features a small bunny rabbit sitting in a basket filled with various flowers. The basket is placed on a yellow background, creating a vibrant and cheerful scene. The flowers surrounding the rabbit come in different sizes and colors, adding to the overall visual appeal of the image. The rabbit appears to be the main focus of the scene, and its presence among the flowers creates a sense of harmony and balance.',
|
557 |
+
'Facebook Post',
|
558 |
+
'green, yellow, minimalist, easter day, happy easter day, easter, happy easter, decoration, happy, egg, spring, selebration, poster, illustration, greeting, season, design, colorful, cute, template',
|
559 |
+
'darkolivegreen, darkolivegreen, darkolivegreen',
|
560 |
+
'Gagalin-Regular, Gagalin-Regular, Brusher-Regular',
|
561 |
+
'MAY ALLYOUR PRAYERS BE ANSWERED**********HAVE A HAPPY**********Easter Day',
|
562 |
+
'[0.08267477203647416, 0.5355623100303951, 0.42857142857142855, 0.07477203647416414]; [0.08389057750759879, 0.1951367781155015, 0.38054711246200607, 0.03768996960486322]; [0.07537993920972644, 0.2601823708206687, 0.49544072948328266, 0.14650455927051673]',
|
563 |
+
1,
|
564 |
+
],
|
565 |
+
[
|
566 |
+
'The image features a large gray elephant sitting in a field of flowers, holding a smaller elephant in its arms. The scene is quite serene and picturesque, with the two elephants being the main focus of the image. The field is filled with various flowers, creating a beautiful and vibrant backdrop for the elephants.',
|
567 |
+
'Cards and invitations',
|
568 |
+
'Light green, orange, Illustration, watercolor, playful, Baby shower invitation, baby boy shower invitation, baby boy, welcoming baby boy, koala baby shower invitation, baby shower invitation for baby shower, baby boy invitation, background, playful baby shower card, baby shower, card, newborn, born, Baby Shirt Baby Shower Invitation',
|
569 |
+
'peru, olive, olivedrab, peru, peru, peru',
|
570 |
+
'LilitaOne, Sensei-Medium, Sensei-Medium, LilitaOne, LilitaOne, LilitaOne',
|
571 |
+
"RSVP to +123-456-7890**********Olivia Wilson**********Baby Shower**********Please Join Us For a**********In Honoring**********23 November, 2021 | 03:00 PM Fauget Hotels",
|
572 |
+
'[0.07112462006079028, 0.6462006079027356, 0.3373860182370821, 0.026747720364741642]; [0.07051671732522796, 0.38662613981762917, 0.37264437689969604, 0.059574468085106386]; [0.07234042553191489, 0.15623100303951368, 0.6547112462006079, 0.12401215805471125]; [0.0662613981762918, 0.06747720364741641, 0.3981762917933131, 0.035866261398176294]; [0.07051671732522796, 0.31550151975683893, 0.22006079027355624, 0.03951367781155015]; [0.06990881458966565, 0.48328267477203646, 0.39878419452887537, 0.1094224924012158]',
|
573 |
+
0,
|
574 |
+
],
|
575 |
+
[
|
576 |
+
'The image features a white background with a variety of colorful flowers and decorations. There are several pink flowers scattered throughout the scene, with some positioned closer to the top and others near the bottom. A blue flower can also be seen in the middle of the image. The overall composition creates a visually appealing and vibrant display.',
|
577 |
+
'Instagram Posts',
|
578 |
+
'grey, navy, purple, pink, teal, colorful, illustration, happy, celebration, post, party, year, new, event, celebrate, happy new year, new year, countdown, sparkle, firework',
|
579 |
+
'purple, midnightblue, black, black',
|
580 |
+
'Caveat-Regular, Gagalin-Regular, Quicksand-Light, Quicksand-Light',
|
581 |
+
'Happy New Year**********2024**********All THE BEST**********A fresh start to start a change for the better.',
|
582 |
+
'[0.2936170212765957, 0.2887537993920973, 0.40303951367781155, 0.07173252279635259]; [0.24984802431610942, 0.3951367781155015, 0.46200607902735563, 0.17203647416413373]; [0.3951367781155015, 0.1094224924012158, 0.2109422492401216, 0.02796352583586626]; [0.20911854103343466, 0.6127659574468085, 0.5586626139817629, 0.08085106382978724]',
|
583 |
+
1,
|
584 |
+
],
|
585 |
+
[
|
586 |
+
'The image features a stack of pancakes with syrup and strawberries on top. The pancakes are arranged in a visually appealing manner, with some pancakes placed on top of each other. The syrup is drizzled generously over the pancakes, and the strawberries are scattered around, adding a touch of color and freshness to the scene. The overall presentation of the pancakes is appetizing and inviting.',
|
587 |
+
'Instagram Posts',
|
588 |
+
'brown, peach, grey, modern, minimalist, simple, colorful, illustration, Instagram post, instagram, post, national pancake day, international pancake day, happy pancake day, pancake day, pancake, sweet, cake, discount, sale',
|
589 |
+
'dimgray, white, darkolivegreen',
|
590 |
+
'MoreSugarRegular, Chewy-Regular, Chewy-Regular',
|
591 |
+
'Get 75% Discount for your first order**********Order Now**********National Pancake Day',
|
592 |
+
'[0.043161094224924014, 0.5963525835866261, 0.2936170212765957, 0.08389057750759879]; [0.12279635258358662, 0.79209726443769, 0.26382978723404255, 0.05167173252279635]; [0.044984802431610946, 0.09787234042553192, 0.4413373860182371, 0.4158054711246201]',
|
593 |
+
1,
|
594 |
+
]
|
595 |
+
],
|
596 |
+
inputs=[
|
597 |
+
bg_prompt,
|
598 |
+
bg_class,
|
599 |
+
bg_tags,
|
600 |
+
color_str,
|
601 |
+
style_str,
|
602 |
+
text_str,
|
603 |
+
box_str,
|
604 |
+
seed_,
|
605 |
+
],
|
606 |
+
outputs=[post_box, box_sketch_template, seed_, *color_row, *colors, *styles, *prompts],
|
607 |
+
fn=process_example,
|
608 |
+
run_on_click=True,
|
609 |
+
label='Examples',
|
610 |
+
)
|
611 |
+
|
612 |
+
demo.queue()
|
613 |
+
demo.launch()
|
614 |
+
|
615 |
+
if __name__ == "__main__":
|
616 |
+
main()
|
assets/Arial.ttf
ADDED
Binary file (276 kB). View file
|
|
assets/chinese_char.txt
ADDED
@@ -0,0 +1,1000 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
的
|
2 |
+
一
|
3 |
+
是
|
4 |
+
不
|
5 |
+
了
|
6 |
+
在
|
7 |
+
人
|
8 |
+
有
|
9 |
+
我
|
10 |
+
他
|
11 |
+
这
|
12 |
+
个
|
13 |
+
们
|
14 |
+
中
|
15 |
+
来
|
16 |
+
上
|
17 |
+
大
|
18 |
+
为
|
19 |
+
和
|
20 |
+
国
|
21 |
+
地
|
22 |
+
到
|
23 |
+
以
|
24 |
+
说
|
25 |
+
时
|
26 |
+
要
|
27 |
+
就
|
28 |
+
出
|
29 |
+
会
|
30 |
+
可
|
31 |
+
也
|
32 |
+
你
|
33 |
+
对
|
34 |
+
生
|
35 |
+
能
|
36 |
+
而
|
37 |
+
子
|
38 |
+
那
|
39 |
+
得
|
40 |
+
于
|
41 |
+
着
|
42 |
+
下
|
43 |
+
自
|
44 |
+
之
|
45 |
+
年
|
46 |
+
过
|
47 |
+
发
|
48 |
+
后
|
49 |
+
作
|
50 |
+
里
|
51 |
+
用
|
52 |
+
道
|
53 |
+
行
|
54 |
+
所
|
55 |
+
然
|
56 |
+
家
|
57 |
+
种
|
58 |
+
事
|
59 |
+
成
|
60 |
+
方
|
61 |
+
多
|
62 |
+
经
|
63 |
+
么
|
64 |
+
去
|
65 |
+
法
|
66 |
+
学
|
67 |
+
如
|
68 |
+
都
|
69 |
+
同
|
70 |
+
现
|
71 |
+
当
|
72 |
+
没
|
73 |
+
动
|
74 |
+
面
|
75 |
+
起
|
76 |
+
看
|
77 |
+
定
|
78 |
+
天
|
79 |
+
分
|
80 |
+
还
|
81 |
+
进
|
82 |
+
好
|
83 |
+
小
|
84 |
+
部
|
85 |
+
其
|
86 |
+
些
|
87 |
+
主
|
88 |
+
样
|
89 |
+
理
|
90 |
+
心
|
91 |
+
她
|
92 |
+
本
|
93 |
+
前
|
94 |
+
开
|
95 |
+
但
|
96 |
+
因
|
97 |
+
只
|
98 |
+
从
|
99 |
+
想
|
100 |
+
实
|
101 |
+
日
|
102 |
+
军
|
103 |
+
者
|
104 |
+
意
|
105 |
+
无
|
106 |
+
力
|
107 |
+
它
|
108 |
+
与
|
109 |
+
长
|
110 |
+
把
|
111 |
+
机
|
112 |
+
十
|
113 |
+
民
|
114 |
+
第
|
115 |
+
公
|
116 |
+
此
|
117 |
+
已
|
118 |
+
工
|
119 |
+
使
|
120 |
+
情
|
121 |
+
明
|
122 |
+
性
|
123 |
+
知
|
124 |
+
全
|
125 |
+
三
|
126 |
+
又
|
127 |
+
关
|
128 |
+
点
|
129 |
+
正
|
130 |
+
业
|
131 |
+
外
|
132 |
+
将
|
133 |
+
两
|
134 |
+
高
|
135 |
+
间
|
136 |
+
由
|
137 |
+
问
|
138 |
+
很
|
139 |
+
最
|
140 |
+
重
|
141 |
+
并
|
142 |
+
物
|
143 |
+
手
|
144 |
+
应
|
145 |
+
战
|
146 |
+
向
|
147 |
+
头
|
148 |
+
文
|
149 |
+
体
|
150 |
+
政
|
151 |
+
美
|
152 |
+
相
|
153 |
+
见
|
154 |
+
被
|
155 |
+
利
|
156 |
+
什
|
157 |
+
二
|
158 |
+
等
|
159 |
+
产
|
160 |
+
或
|
161 |
+
新
|
162 |
+
己
|
163 |
+
制
|
164 |
+
身
|
165 |
+
果
|
166 |
+
加
|
167 |
+
西
|
168 |
+
斯
|
169 |
+
月
|
170 |
+
话
|
171 |
+
合
|
172 |
+
回
|
173 |
+
特
|
174 |
+
代
|
175 |
+
内
|
176 |
+
信
|
177 |
+
表
|
178 |
+
化
|
179 |
+
老
|
180 |
+
给
|
181 |
+
世
|
182 |
+
位
|
183 |
+
次
|
184 |
+
度
|
185 |
+
门
|
186 |
+
任
|
187 |
+
常
|
188 |
+
先
|
189 |
+
海
|
190 |
+
通
|
191 |
+
教
|
192 |
+
儿
|
193 |
+
原
|
194 |
+
东
|
195 |
+
声
|
196 |
+
提
|
197 |
+
立
|
198 |
+
及
|
199 |
+
比
|
200 |
+
员
|
201 |
+
解
|
202 |
+
水
|
203 |
+
名
|
204 |
+
真
|
205 |
+
论
|
206 |
+
处
|
207 |
+
走
|
208 |
+
义
|
209 |
+
各
|
210 |
+
入
|
211 |
+
几
|
212 |
+
口
|
213 |
+
认
|
214 |
+
条
|
215 |
+
平
|
216 |
+
系
|
217 |
+
气
|
218 |
+
题
|
219 |
+
活
|
220 |
+
尔
|
221 |
+
更
|
222 |
+
别
|
223 |
+
打
|
224 |
+
女
|
225 |
+
变
|
226 |
+
四
|
227 |
+
神
|
228 |
+
总
|
229 |
+
何
|
230 |
+
电
|
231 |
+
数
|
232 |
+
安
|
233 |
+
少
|
234 |
+
报
|
235 |
+
才
|
236 |
+
结
|
237 |
+
反
|
238 |
+
受
|
239 |
+
目
|
240 |
+
太
|
241 |
+
量
|
242 |
+
再
|
243 |
+
感
|
244 |
+
建
|
245 |
+
务
|
246 |
+
做
|
247 |
+
接
|
248 |
+
必
|
249 |
+
场
|
250 |
+
件
|
251 |
+
计
|
252 |
+
管
|
253 |
+
期
|
254 |
+
市
|
255 |
+
直
|
256 |
+
德
|
257 |
+
资
|
258 |
+
命
|
259 |
+
山
|
260 |
+
金
|
261 |
+
指
|
262 |
+
克
|
263 |
+
许
|
264 |
+
统
|
265 |
+
区
|
266 |
+
保
|
267 |
+
至
|
268 |
+
队
|
269 |
+
形
|
270 |
+
社
|
271 |
+
便
|
272 |
+
空
|
273 |
+
决
|
274 |
+
治
|
275 |
+
展
|
276 |
+
马
|
277 |
+
科
|
278 |
+
司
|
279 |
+
五
|
280 |
+
基
|
281 |
+
眼
|
282 |
+
书
|
283 |
+
非
|
284 |
+
则
|
285 |
+
听
|
286 |
+
白
|
287 |
+
却
|
288 |
+
界
|
289 |
+
达
|
290 |
+
光
|
291 |
+
放
|
292 |
+
强
|
293 |
+
即
|
294 |
+
像
|
295 |
+
难
|
296 |
+
且
|
297 |
+
权
|
298 |
+
思
|
299 |
+
王
|
300 |
+
象
|
301 |
+
完
|
302 |
+
设
|
303 |
+
式
|
304 |
+
色
|
305 |
+
路
|
306 |
+
记
|
307 |
+
南
|
308 |
+
品
|
309 |
+
住
|
310 |
+
告
|
311 |
+
类
|
312 |
+
求
|
313 |
+
据
|
314 |
+
程
|
315 |
+
北
|
316 |
+
边
|
317 |
+
死
|
318 |
+
张
|
319 |
+
该
|
320 |
+
交
|
321 |
+
规
|
322 |
+
万
|
323 |
+
取
|
324 |
+
拉
|
325 |
+
格
|
326 |
+
望
|
327 |
+
觉
|
328 |
+
术
|
329 |
+
领
|
330 |
+
共
|
331 |
+
确
|
332 |
+
传
|
333 |
+
师
|
334 |
+
观
|
335 |
+
清
|
336 |
+
今
|
337 |
+
切
|
338 |
+
院
|
339 |
+
让
|
340 |
+
识
|
341 |
+
候
|
342 |
+
带
|
343 |
+
导
|
344 |
+
争
|
345 |
+
运
|
346 |
+
笑
|
347 |
+
飞
|
348 |
+
风
|
349 |
+
步
|
350 |
+
改
|
351 |
+
收
|
352 |
+
根
|
353 |
+
干
|
354 |
+
造
|
355 |
+
言
|
356 |
+
联
|
357 |
+
持
|
358 |
+
组
|
359 |
+
每
|
360 |
+
济
|
361 |
+
车
|
362 |
+
亲
|
363 |
+
极
|
364 |
+
林
|
365 |
+
服
|
366 |
+
快
|
367 |
+
办
|
368 |
+
议
|
369 |
+
往
|
370 |
+
元
|
371 |
+
英
|
372 |
+
士
|
373 |
+
证
|
374 |
+
近
|
375 |
+
失
|
376 |
+
转
|
377 |
+
夫
|
378 |
+
令
|
379 |
+
准
|
380 |
+
布
|
381 |
+
始
|
382 |
+
怎
|
383 |
+
呢
|
384 |
+
存
|
385 |
+
未
|
386 |
+
远
|
387 |
+
叫
|
388 |
+
台
|
389 |
+
单
|
390 |
+
影
|
391 |
+
具
|
392 |
+
罗
|
393 |
+
字
|
394 |
+
爱
|
395 |
+
击
|
396 |
+
流
|
397 |
+
备
|
398 |
+
兵
|
399 |
+
连
|
400 |
+
调
|
401 |
+
深
|
402 |
+
商
|
403 |
+
算
|
404 |
+
质
|
405 |
+
团
|
406 |
+
集
|
407 |
+
百
|
408 |
+
需
|
409 |
+
价
|
410 |
+
花
|
411 |
+
党
|
412 |
+
华
|
413 |
+
城
|
414 |
+
石
|
415 |
+
级
|
416 |
+
整
|
417 |
+
府
|
418 |
+
离
|
419 |
+
况
|
420 |
+
亚
|
421 |
+
请
|
422 |
+
技
|
423 |
+
际
|
424 |
+
约
|
425 |
+
示
|
426 |
+
复
|
427 |
+
病
|
428 |
+
息
|
429 |
+
究
|
430 |
+
线
|
431 |
+
似
|
432 |
+
官
|
433 |
+
火
|
434 |
+
断
|
435 |
+
精
|
436 |
+
满
|
437 |
+
支
|
438 |
+
视
|
439 |
+
消
|
440 |
+
越
|
441 |
+
器
|
442 |
+
容
|
443 |
+
照
|
444 |
+
须
|
445 |
+
九
|
446 |
+
增
|
447 |
+
研
|
448 |
+
写
|
449 |
+
称
|
450 |
+
企
|
451 |
+
八
|
452 |
+
功
|
453 |
+
吗
|
454 |
+
包
|
455 |
+
片
|
456 |
+
史
|
457 |
+
委
|
458 |
+
乎
|
459 |
+
查
|
460 |
+
轻
|
461 |
+
易
|
462 |
+
早
|
463 |
+
曾
|
464 |
+
除
|
465 |
+
农
|
466 |
+
找
|
467 |
+
装
|
468 |
+
广
|
469 |
+
显
|
470 |
+
吧
|
471 |
+
阿
|
472 |
+
李
|
473 |
+
标
|
474 |
+
谈
|
475 |
+
吃
|
476 |
+
图
|
477 |
+
念
|
478 |
+
六
|
479 |
+
引
|
480 |
+
历
|
481 |
+
首
|
482 |
+
医
|
483 |
+
局
|
484 |
+
突
|
485 |
+
专
|
486 |
+
费
|
487 |
+
号
|
488 |
+
尽
|
489 |
+
另
|
490 |
+
周
|
491 |
+
较
|
492 |
+
注
|
493 |
+
语
|
494 |
+
仅
|
495 |
+
考
|
496 |
+
落
|
497 |
+
青
|
498 |
+
随
|
499 |
+
选
|
500 |
+
列
|
501 |
+
武
|
502 |
+
红
|
503 |
+
响
|
504 |
+
虽
|
505 |
+
推
|
506 |
+
势
|
507 |
+
参
|
508 |
+
希
|
509 |
+
古
|
510 |
+
众
|
511 |
+
构
|
512 |
+
房
|
513 |
+
半
|
514 |
+
节
|
515 |
+
土
|
516 |
+
投
|
517 |
+
某
|
518 |
+
案
|
519 |
+
黑
|
520 |
+
维
|
521 |
+
革
|
522 |
+
划
|
523 |
+
敌
|
524 |
+
致
|
525 |
+
陈
|
526 |
+
律
|
527 |
+
足
|
528 |
+
态
|
529 |
+
护
|
530 |
+
七
|
531 |
+
兴
|
532 |
+
派
|
533 |
+
孩
|
534 |
+
验
|
535 |
+
责
|
536 |
+
营
|
537 |
+
星
|
538 |
+
够
|
539 |
+
章
|
540 |
+
音
|
541 |
+
跟
|
542 |
+
志
|
543 |
+
底
|
544 |
+
站
|
545 |
+
严
|
546 |
+
巴
|
547 |
+
例
|
548 |
+
防
|
549 |
+
族
|
550 |
+
供
|
551 |
+
效
|
552 |
+
续
|
553 |
+
施
|
554 |
+
留
|
555 |
+
讲
|
556 |
+
型
|
557 |
+
料
|
558 |
+
终
|
559 |
+
答
|
560 |
+
紧
|
561 |
+
黄
|
562 |
+
绝
|
563 |
+
奇
|
564 |
+
察
|
565 |
+
母
|
566 |
+
京
|
567 |
+
段
|
568 |
+
依
|
569 |
+
批
|
570 |
+
群
|
571 |
+
项
|
572 |
+
故
|
573 |
+
按
|
574 |
+
河
|
575 |
+
米
|
576 |
+
围
|
577 |
+
江
|
578 |
+
织
|
579 |
+
害
|
580 |
+
斗
|
581 |
+
双
|
582 |
+
境
|
583 |
+
客
|
584 |
+
纪
|
585 |
+
采
|
586 |
+
举
|
587 |
+
杀
|
588 |
+
攻
|
589 |
+
父
|
590 |
+
苏
|
591 |
+
密
|
592 |
+
低
|
593 |
+
朝
|
594 |
+
友
|
595 |
+
诉
|
596 |
+
止
|
597 |
+
细
|
598 |
+
愿
|
599 |
+
千
|
600 |
+
值
|
601 |
+
仍
|
602 |
+
男
|
603 |
+
钱
|
604 |
+
破
|
605 |
+
网
|
606 |
+
热
|
607 |
+
助
|
608 |
+
倒
|
609 |
+
育
|
610 |
+
属
|
611 |
+
坐
|
612 |
+
帝
|
613 |
+
限
|
614 |
+
船
|
615 |
+
脸
|
616 |
+
职
|
617 |
+
速
|
618 |
+
刻
|
619 |
+
乐
|
620 |
+
否
|
621 |
+
刚
|
622 |
+
威
|
623 |
+
毛
|
624 |
+
状
|
625 |
+
率
|
626 |
+
甚
|
627 |
+
独
|
628 |
+
球
|
629 |
+
般
|
630 |
+
普
|
631 |
+
怕
|
632 |
+
弹
|
633 |
+
校
|
634 |
+
苦
|
635 |
+
创
|
636 |
+
假
|
637 |
+
久
|
638 |
+
错
|
639 |
+
承
|
640 |
+
印
|
641 |
+
晚
|
642 |
+
兰
|
643 |
+
试
|
644 |
+
股
|
645 |
+
拿
|
646 |
+
脑
|
647 |
+
预
|
648 |
+
谁
|
649 |
+
益
|
650 |
+
阳
|
651 |
+
若
|
652 |
+
哪
|
653 |
+
微
|
654 |
+
尼
|
655 |
+
继
|
656 |
+
送
|
657 |
+
急
|
658 |
+
血
|
659 |
+
惊
|
660 |
+
伤
|
661 |
+
素
|
662 |
+
药
|
663 |
+
适
|
664 |
+
波
|
665 |
+
夜
|
666 |
+
省
|
667 |
+
初
|
668 |
+
喜
|
669 |
+
卫
|
670 |
+
源
|
671 |
+
食
|
672 |
+
险
|
673 |
+
待
|
674 |
+
述
|
675 |
+
陆
|
676 |
+
习
|
677 |
+
置
|
678 |
+
居
|
679 |
+
劳
|
680 |
+
财
|
681 |
+
环
|
682 |
+
排
|
683 |
+
福
|
684 |
+
纳
|
685 |
+
欢
|
686 |
+
雷
|
687 |
+
警
|
688 |
+
获
|
689 |
+
模
|
690 |
+
充
|
691 |
+
负
|
692 |
+
云
|
693 |
+
停
|
694 |
+
木
|
695 |
+
游
|
696 |
+
龙
|
697 |
+
树
|
698 |
+
疑
|
699 |
+
层
|
700 |
+
冷
|
701 |
+
洲
|
702 |
+
冲
|
703 |
+
射
|
704 |
+
略
|
705 |
+
范
|
706 |
+
竟
|
707 |
+
句
|
708 |
+
室
|
709 |
+
异
|
710 |
+
激
|
711 |
+
汉
|
712 |
+
村
|
713 |
+
哈
|
714 |
+
策
|
715 |
+
演
|
716 |
+
简
|
717 |
+
卡
|
718 |
+
罪
|
719 |
+
判
|
720 |
+
担
|
721 |
+
州
|
722 |
+
静
|
723 |
+
退
|
724 |
+
既
|
725 |
+
衣
|
726 |
+
您
|
727 |
+
宗
|
728 |
+
积
|
729 |
+
余
|
730 |
+
痛
|
731 |
+
检
|
732 |
+
差
|
733 |
+
富
|
734 |
+
灵
|
735 |
+
协
|
736 |
+
角
|
737 |
+
占
|
738 |
+
配
|
739 |
+
征
|
740 |
+
修
|
741 |
+
皮
|
742 |
+
挥
|
743 |
+
胜
|
744 |
+
降
|
745 |
+
阶
|
746 |
+
审
|
747 |
+
沉
|
748 |
+
坚
|
749 |
+
善
|
750 |
+
妈
|
751 |
+
刘
|
752 |
+
读
|
753 |
+
啊
|
754 |
+
超
|
755 |
+
免
|
756 |
+
压
|
757 |
+
银
|
758 |
+
买
|
759 |
+
皇
|
760 |
+
养
|
761 |
+
伊
|
762 |
+
怀
|
763 |
+
执
|
764 |
+
副
|
765 |
+
乱
|
766 |
+
抗
|
767 |
+
犯
|
768 |
+
追
|
769 |
+
帮
|
770 |
+
宣
|
771 |
+
佛
|
772 |
+
岁
|
773 |
+
航
|
774 |
+
优
|
775 |
+
怪
|
776 |
+
香
|
777 |
+
著
|
778 |
+
田
|
779 |
+
铁
|
780 |
+
控
|
781 |
+
税
|
782 |
+
左
|
783 |
+
右
|
784 |
+
份
|
785 |
+
穿
|
786 |
+
艺
|
787 |
+
背
|
788 |
+
阵
|
789 |
+
草
|
790 |
+
脚
|
791 |
+
概
|
792 |
+
恶
|
793 |
+
块
|
794 |
+
顿
|
795 |
+
敢
|
796 |
+
守
|
797 |
+
酒
|
798 |
+
岛
|
799 |
+
托
|
800 |
+
央
|
801 |
+
户
|
802 |
+
烈
|
803 |
+
洋
|
804 |
+
哥
|
805 |
+
索
|
806 |
+
胡
|
807 |
+
款
|
808 |
+
靠
|
809 |
+
评
|
810 |
+
版
|
811 |
+
宝
|
812 |
+
座
|
813 |
+
释
|
814 |
+
景
|
815 |
+
顾
|
816 |
+
弟
|
817 |
+
登
|
818 |
+
货
|
819 |
+
互
|
820 |
+
付
|
821 |
+
伯
|
822 |
+
慢
|
823 |
+
欧
|
824 |
+
换
|
825 |
+
闻
|
826 |
+
危
|
827 |
+
忙
|
828 |
+
核
|
829 |
+
暗
|
830 |
+
姐
|
831 |
+
介
|
832 |
+
坏
|
833 |
+
讨
|
834 |
+
丽
|
835 |
+
良
|
836 |
+
序
|
837 |
+
升
|
838 |
+
监
|
839 |
+
临
|
840 |
+
亮
|
841 |
+
露
|
842 |
+
永
|
843 |
+
呼
|
844 |
+
味
|
845 |
+
野
|
846 |
+
架
|
847 |
+
域
|
848 |
+
沙
|
849 |
+
掉
|
850 |
+
括
|
851 |
+
舰
|
852 |
+
鱼
|
853 |
+
杂
|
854 |
+
误
|
855 |
+
湾
|
856 |
+
吉
|
857 |
+
减
|
858 |
+
编
|
859 |
+
楚
|
860 |
+
肯
|
861 |
+
测
|
862 |
+
败
|
863 |
+
屋
|
864 |
+
跑
|
865 |
+
梦
|
866 |
+
散
|
867 |
+
温
|
868 |
+
困
|
869 |
+
剑
|
870 |
+
渐
|
871 |
+
封
|
872 |
+
救
|
873 |
+
贵
|
874 |
+
枪
|
875 |
+
缺
|
876 |
+
楼
|
877 |
+
县
|
878 |
+
尚
|
879 |
+
毫
|
880 |
+
移
|
881 |
+
娘
|
882 |
+
朋
|
883 |
+
画
|
884 |
+
班
|
885 |
+
智
|
886 |
+
亦
|
887 |
+
耳
|
888 |
+
恩
|
889 |
+
短
|
890 |
+
掌
|
891 |
+
恐
|
892 |
+
遗
|
893 |
+
固
|
894 |
+
席
|
895 |
+
松
|
896 |
+
秘
|
897 |
+
谢
|
898 |
+
鲁
|
899 |
+
遇
|
900 |
+
康
|
901 |
+
虑
|
902 |
+
幸
|
903 |
+
均
|
904 |
+
销
|
905 |
+
钟
|
906 |
+
诗
|
907 |
+
藏
|
908 |
+
赶
|
909 |
+
剧
|
910 |
+
票
|
911 |
+
损
|
912 |
+
忽
|
913 |
+
巨
|
914 |
+
炮
|
915 |
+
旧
|
916 |
+
端
|
917 |
+
探
|
918 |
+
湖
|
919 |
+
录
|
920 |
+
叶
|
921 |
+
春
|
922 |
+
乡
|
923 |
+
附
|
924 |
+
吸
|
925 |
+
予
|
926 |
+
礼
|
927 |
+
港
|
928 |
+
雨
|
929 |
+
呀
|
930 |
+
板
|
931 |
+
庭
|
932 |
+
妇
|
933 |
+
归
|
934 |
+
睛
|
935 |
+
饭
|
936 |
+
额
|
937 |
+
含
|
938 |
+
顺
|
939 |
+
输
|
940 |
+
摇
|
941 |
+
招
|
942 |
+
婚
|
943 |
+
脱
|
944 |
+
补
|
945 |
+
谓
|
946 |
+
督
|
947 |
+
毒
|
948 |
+
油
|
949 |
+
疗
|
950 |
+
旅
|
951 |
+
泽
|
952 |
+
材
|
953 |
+
灭
|
954 |
+
逐
|
955 |
+
莫
|
956 |
+
笔
|
957 |
+
亡
|
958 |
+
鲜
|
959 |
+
词
|
960 |
+
圣
|
961 |
+
择
|
962 |
+
寻
|
963 |
+
厂
|
964 |
+
睡
|
965 |
+
博
|
966 |
+
勒
|
967 |
+
烟
|
968 |
+
授
|
969 |
+
诺
|
970 |
+
伦
|
971 |
+
岸
|
972 |
+
奥
|
973 |
+
唐
|
974 |
+
卖
|
975 |
+
俄
|
976 |
+
炸
|
977 |
+
载
|
978 |
+
洛
|
979 |
+
健
|
980 |
+
堂
|
981 |
+
旁
|
982 |
+
宫
|
983 |
+
喝
|
984 |
+
借
|
985 |
+
君
|
986 |
+
禁
|
987 |
+
阴
|
988 |
+
园
|
989 |
+
谋
|
990 |
+
宋
|
991 |
+
避
|
992 |
+
抓
|
993 |
+
荣
|
994 |
+
姑
|
995 |
+
孙
|
996 |
+
逃
|
997 |
+
牙
|
998 |
+
束
|
999 |
+
跳
|
1000 |
+
顶
|
assets/color_idx.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"white": 0, "black": 1, "darkslategray": 2, "dimgray": 3, "darkolivegreen": 4, "midnightblue": 5, "saddlebrown": 6, "sienna": 7, "whitesmoke": 8, "darkslateblue": 9, "indianred": 10, "linen": 11, "maroon": 12, "khaki": 13, "sandybrown": 14, "gray": 15, "gainsboro": 16, "teal": 17, "peru": 18, "gold": 19, "snow": 20, "firebrick": 21, "crimson": 22, "chocolate": 23, "tomato": 24, "brown": 25, "goldenrod": 26, "antiquewhite": 27, "rosybrown": 28, "steelblue": 29, "floralwhite": 30, "seashell": 31, "darkgreen": 32, "oldlace": 33, "darkkhaki": 34, "burlywood": 35, "red": 36, "darkgray": 37, "orange": 38, "royalblue": 39, "seagreen": 40, "lightgray": 41, "tan": 42, "coral": 43, "beige": 44, "palevioletred": 45, "wheat": 46, "lavender": 47, "darkcyan": 48, "slateblue": 49, "slategray": 50, "orangered": 51, "silver": 52, "olivedrab": 53, "forestgreen": 54, "darkgoldenrod": 55, "ivory": 56, "darkorange": 57, "yellow": 58, "hotpink": 59, "ghostwhite": 60, "lightcoral": 61, "indigo": 62, "bisque": 63, "darkred": 64, "darksalmon": 65, "lightslategray": 66, "dodgerblue": 67, "lightpink": 68, "mistyrose": 69, "mediumvioletred": 70, "cadetblue": 71, "deeppink": 72, "salmon": 73, "palegoldenrod": 74, "blanchedalmond": 75, "lightseagreen": 76, "cornflowerblue": 77, "yellowgreen": 78, "greenyellow": 79, "navajowhite": 80, "papayawhip": 81, "mediumslateblue": 82, "purple": 83, "blueviolet": 84, "pink": 85, "cornsilk": 86, "lightsalmon": 87, "mediumpurple": 88, "moccasin": 89, "turquoise": 90, "mediumseagreen": 91, "lavenderblush": 92, "mediumblue": 93, "darkseagreen": 94, "mediumturquoise": 95, "paleturquoise": 96, "skyblue": 97, "lemonchiffon": 98, "olive": 99, "peachpuff": 100, "lightyellow": 101, "lightsteelblue": 102, "mediumorchid": 103, "plum": 104, "darkturquoise": 105, "aliceblue": 106, "mediumaquamarine": 107, "orchid": 108, "powderblue": 109, "blue": 110, "darkorchid": 111, "violet": 112, "lightskyblue": 113, "lightcyan": 114, "lightgoldenrodyellow": 115, "navy": 116, "thistle": 117, "honeydew": 118, "mintcream": 119, "lightblue": 120, "darkblue": 121, "darkmagenta": 122, "deepskyblue": 123, "magenta": 124, "limegreen": 125, "darkviolet": 126, "cyan": 127, "palegreen": 128, "aquamarine": 129, "lawngreen": 130, "lightgreen": 131, "azure": 132, "chartreuse": 133, "green": 134, "mediumspringgreen": 135, "lime": 136, "springgreen": 137}
|
assets/font_idx_512.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"Montserrat-Regular": 0, "Poppins-Italic": 1, "GlacialIndifference-Regular": 2, "OpenSans-ExtraBoldItalic": 3, "Montserrat-Bold": 4, "Now-Regular": 5, "Garet-Regular": 6, "LeagueSpartan-Bold": 7, "DMSans-Regular": 8, "OpenSauceOne-Regular": 9, "OpenSans-ExtraBold": 10, "KGPrimaryPenmanship": 11, "Anton-Regular": 12, "Aileron-BlackItalic": 13, "Quicksand-Light": 14, "Roboto-BoldItalic": 15, "TheSeasons-It": 16, "Kollektif": 17, "Inter-BoldItalic": 18, "Poppins-Medium": 19, "Poppins-Light": 20, "RoxboroughCF-RegularItalic": 21, "PlayfairDisplay-SemiBold": 22, "Agrandir-Italic": 23, "Lato-Regular": 24, "MoreSugarRegular": 25, "CanvaSans-RegularItalic": 26, "PublicSans-Italic": 27, "CodePro-NormalLC": 28, "Belleza-Regular": 29, "JosefinSans-Bold": 30, "HKGrotesk-Bold": 31, "Telegraf-Medium": 32, "BrittanySignatureRegular": 33, "Raleway-ExtraBoldItalic": 34, "Mont-RegularItalic": 35, "Arimo-BoldItalic": 36, "Lora-Italic": 37, "ArchivoBlack-Regular": 38, "Poppins": 39, "Barlow-Black": 40, "CormorantGaramond-Bold": 41, "LibreBaskerville-Regular": 42, "CanvaSchoolFontRegular": 43, "BebasNeueBold": 44, "LazydogRegular": 45, "FredokaOne-Regular": 46, "Horizon-Bold": 47, "Nourd-Regular": 48, "Hatton-Regular": 49, "Nunito-ExtraBoldItalic": 50, "CerebriSans-Regular": 51, "Montserrat-Light": 52, "TenorSans": 53, "Norwester-Regular": 54, "ClearSans-Bold": 55, "Cardo-Regular": 56, "Alice-Regular": 57, "Oswald-Regular": 58, "Gaegu-Bold": 59, "Muli-Black": 60, "TAN-PEARL-Regular": 61, "CooperHewitt-Book": 62, "Agrandir-Grand": 63, "BlackMango-Thin": 64, "DMSerifDisplay-Regular": 65, "Antonio-Bold": 66, "Sniglet-Regular": 67, "BeVietnam-Regular": 68, "NunitoSans10pt-BlackItalic": 69, "AbhayaLibre-ExtraBold": 70, "Rubik-Regular": 71, "PPNeueMachina-Regular": 72, "TAN - MON CHERI-Regular": 73, "Jua-Regular": 74, "Playlist-Script": 75, "SourceSansPro-BoldItalic": 76, "MoonTime-Regular": 77, "Eczar-ExtraBold": 78, "Gatwick-Regular": 79, "MonumentExtended-Regular": 80, "BarlowSemiCondensed-Regular": 81, "BarlowCondensed-Regular": 82, "Alegreya-Regular": 83, "DreamAvenue": 84, "RobotoCondensed-Italic": 85, "BobbyJones-Regular": 86, "Garet-ExtraBold": 87, "YesevaOne-Regular": 88, "Dosis-ExtraBold": 89, "LeagueGothic-Regular": 90, "OpenSans-Italic": 91, "TANAEGEAN-Regular": 92, "Maharlika-Regular": 93, "MarykateRegular": 94, "Cinzel-Regular": 95, "Agrandir-Wide": 96, "Chewy-Regular": 97, "BodoniFLF-BoldItalic": 98, "Nunito-BlackItalic": 99, "LilitaOne": 100, "HandyCasualCondensed-Regular": 101, "Ovo": 102, "Livvic-Regular": 103, "Agrandir-Narrow": 104, "CrimsonPro-Italic": 105, "AnonymousPro-Bold": 106, "NF-OneLittleFont-Bold": 107, "RedHatDisplay-BoldItalic": 108, "CodecPro-Regular": 109, "HalimunRegular": 110, "LibreFranklin-Black": 111, "TeXGyreTermes-BoldItalic": 112, "Shrikhand-Regular": 113, "TTNormsPro-Italic": 114, "Gagalin-Regular": 115, "OpenSans-Bold": 116, "GreatVibes-Regular": 117, "Breathing": 118, "HeroLight-Regular": 119, "KGPrimaryDots": 120, "Quicksand-Bold": 121, "Brice-ExtraLightSemiExpanded": 122, "Lato-BoldItalic": 123, "Fraunces9pt-Italic": 124, "AbrilFatface-Regular": 125, "BerkshireSwash-Regular": 126, "Atma-Bold": 127, "HolidayRegular": 128, "BebasNeueCyrillic": 129, "IntroRust-Base": 130, "Gistesy": 131, "BDScript-Regular": 132, "ApricotsRegular": 133, "Prompt-Black": 134, "TAN MERINGUE": 135, "Sukar Regular": 136, "GentySans-Regular": 137, "NeueEinstellung-Normal": 138, "Garet-Bold": 139, "FiraSans-Black": 140, "BantayogLight": 141, "NotoSerifDisplay-Black": 142, "TTChocolates-Regular": 143, "Ubuntu-Regular": 144, "Assistant-Bold": 145, "ABeeZee-Regular": 146, "LexendDeca-Regular": 147, "KingredSerif": 148, "Radley-Regular": 149, "BrownSugar": 150, "MigraItalic-ExtraboldItalic": 151, "ChildosArabic-Regular": 152, "PeaceSans": 153, "LondrinaSolid-Black": 154, "SpaceMono-BoldItalic": 155, "RobotoMono-Light": 156, "CourierPrime-Regular": 157, "Alata-Regular": 158, "Amsterdam-One": 159, "IreneFlorentina-Regular": 160, "CatchyMager": 161, "Alta_regular": 162, "ArticulatCF-Regular": 163, "Raleway-Regular": 164, "BrasikaDisplay": 165, "TANAngleton-Italic": 166, "NotoSerifDisplay-ExtraCondensedItalic": 167, "Bryndan Write": 168, "TTCommonsPro-It": 169, "AlexBrush-Regular": 170, "Antic-Regular": 171, "TTHoves-Bold": 172, "DroidSerif": 173, "AblationRegular": 174, "Marcellus-Regular": 175, "Sanchez-Italic": 176, "JosefinSans": 177, "Afrah-Regular": 178, "PinyonScript": 179, "TTInterphases-BoldItalic": 180, "Yellowtail-Regular": 181, "Gliker-Regular": 182, "BobbyJonesSoft-Regular": 183, "IBMPlexSans": 184, "Amsterdam-Three": 185, "Amsterdam-FourSlant": 186, "TTFors-Regular": 187, "Quattrocento": 188, "Sifonn-Basic": 189, "AlegreyaSans-Black": 190, "Daydream": 191, "AristotelicaProTx-Rg": 192, "NotoSerif": 193, "EBGaramond-Italic": 194, "HammersmithOne-Regular": 195, "RobotoSlab-Regular": 196, "DO-Sans-Regular": 197, "KGPrimaryDotsLined": 198, "Blinker-Regular": 199, "TAN NIMBUS": 200, "Blueberry-Regular": 201, "Rosario-Regular": 202, "Forum": 203, "MistrullyRegular": 204, "SourceSerifPro-Regular": 205, "Bugaki-Regular": 206, "CMUSerif-Roman": 207, "GulfsDisplay-NormalItalic": 208, "PTSans-Bold": 209, "Sensei-Medium": 210, "SquadaOne-Regular": 211, "Arapey-Italic": 212, "Parisienne-Regular": 213, "Aleo-Italic": 214, "QuicheDisplay-Italic": 215, "RocaOne-It": 216, "Funtastic-Regular": 217, "PTSerif-BoldItalic": 218, "Muller-RegularItalic": 219, "ArgentCF-Regular": 220, "Brightwall-Italic": 221, "Knewave-Regular": 222, "TYSerif-D": 223, "Agrandir-Tight": 224, "AlfaSlabOne-Regular": 225, "TANTangkiwood-Display": 226, "Kief-Montaser-Regular": 227, "Gotham-Book": 228, "JuliusSansOne-Regular": 229, "CocoGothic-Italic": 230, "SairaCondensed-Regular": 231, "DellaRespira-Regular": 232, "Questrial-Regular": 233, "BukhariScript-Regular": 234, "HelveticaWorld-Bold": 235, "TANKINDRED-Display": 236, "CinzelDecorative-Regular": 237, "Vidaloka-Regular": 238, "AlegreyaSansSC-Black": 239, "FeelingPassionate-Regular": 240, "QuincyCF-Regular": 241, "FiraCode-Regular": 242, "Genty-Regular": 243, "Nickainley-Normal": 244, "RubikOne-Regular": 245, "Gidole-Regular": 246, "Borsok": 247, "Gordita-RegularItalic": 248, "Scripter-Regular": 249, "Buffalo-Regular": 250, "KleinText-Regular": 251, "Creepster-Regular": 252, "Arvo-Bold": 253, "GabrielSans-NormalItalic": 254, "Heebo-Black": 255, "LexendExa-Regular": 256, "BrixtonSansTC-Regular": 257, "GildaDisplay-Regular": 258, "ChunkFive-Roman": 259, "Amaranth-BoldItalic": 260, "BubbleboddyNeue-Regular": 261, "MavenPro-Bold": 262, "TTDrugs-Italic": 263, "CyGrotesk-KeyRegular": 264, "VarelaRound-Regular": 265, "Ruda-Black": 266, "SafiraMarch": 267, "BloggerSans": 268, "TANHEADLINE-Regular": 269, "SloopScriptPro-Regular": 270, "NeueMontreal-Regular": 271, "Schoolbell-Regular": 272, "SigherRegular": 273, "InriaSerif-Regular": 274, "JetBrainsMono-Regular": 275, "MADEEvolveSans": 276, "Dekko": 277, "Handyman-Regular": 278, "Aileron-BoldItalic": 279, "Bright-Italic": 280, "Solway-Regular": 281, "Higuen-Regular": 282, "WedgesItalic": 283, "TANASHFORD-BOLD": 284, "IBMPlexMono": 285, "RacingSansOne-Regular": 286, "RegularBrush": 287, "OpenSans-LightItalic": 288, "SpecialElite-Regular": 289, "FuturaLTPro-Medium": 290, "MaragsaDisplay": 291, "BigShouldersDisplay-Regular": 292, "BDSans-Regular": 293, "RasputinRegular": 294, "Yvesyvesdrawing-BoldItalic": 295, "Bitter-Regular": 296, "LuckiestGuy-Regular": 297, "CanvaSchoolFontDotted": 298, "TTFirsNeue-Italic": 299, "Sunday-Regular": 300, "HKGothic-MediumItalic": 301, "CaveatBrush-Regular": 302, "HeliosExt": 303, "ArchitectsDaughter-Regular": 304, "Angelina": 305, "Calistoga-Regular": 306, "ArchivoNarrow-Regular": 307, "ObjectSans-MediumSlanted": 308, "AyrLucidityCondensed-Regular": 309, "Nexa-RegularItalic": 310, "Lustria-Regular": 311, "Amsterdam-TwoSlant": 312, "Virtual-Regular": 313, "Brusher-Regular": 314, "NF-Lepetitcochon-Regular": 315, "TANTWINKLE": 316, "LeJour-Serif": 317, "Prata-Regular": 318, "PPWoodland-Regular": 319, "PlayfairDisplay-BoldItalic": 320, "AmaticSC-Regular": 321, "Cabin-Regular": 322, "Manjari-Bold": 323, "MrDafoe-Regular": 324, "TTRamillas-Italic": 325, "Luckybones-Bold": 326, "DarkerGrotesque-Light": 327, "BellabooRegular": 328, "CormorantSC-Bold": 329, "GochiHand-Regular": 330, "Atteron": 331, "RocaTwo-Lt": 332, "ZCOOLXiaoWei-Regular": 333, "TANSONGBIRD": 334, "HeadingNow-74Regular": 335, "Luthier-BoldItalic": 336, "Oregano-Regular": 337, "AyrTropikaIsland-Int": 338, "Mali-Regular": 339, "DidactGothic-Regular": 340, "Lovelace-Regular": 341, "BakerieSmooth-Regular": 342, "CarterOne": 343, "HussarBd": 344, "OldStandard-Italic": 345, "TAN-ASTORIA-Display": 346, "rugratssans-Regular": 347, "BMHANNA": 348, "BetterSaturday": 349, "AdigianaToybox": 350, "Sailors": 351, "PlayfairDisplaySC-Italic": 352, "Etna-Regular": 353, "Revive80Signature": 354, "CAGenerated": 355, "Poppins-Regular": 356, "Jonathan-Regular": 357, "Pacifico-Regular": 358, "Saira-Black": 359, "Loubag-Regular": 360, "Decalotype-Black": 361, "Mansalva-Regular": 362, "Allura-Regular": 363, "ProximaNova-Bold": 364, "TANMIGNON-DISPLAY": 365, "ArsenicaAntiqua-Regular": 366, "BreulGroteskA-RegularItalic": 367, "HKModular-Bold": 368, "TANNightingale-Regular": 369, "AristotelicaProCndTxt-Rg": 370, "Aprila-Regular": 371, "Tomorrow-Regular": 372, "AngellaWhite": 373, "KaushanScript-Regular": 374, "NotoSans": 375, "LeJour-Script": 376, "BrixtonTC-Regular": 377, "OleoScript-Regular": 378, "Cakerolli-Regular": 379, "Lobster-Regular": 380, "FrunchySerif-Regular": 381, "PorcelainRegular": 382, "AlojaExtended": 383, "SergioTrendy-Italic": 384, "LovelaceText-Bold": 385, "Anaktoria": 386, "JimmyScript-Light": 387, "IBMPlexSerif": 388, "Marta": 389, "Mango-Regular": 390, "Overpass-Italic": 391, "Hagrid-Regular": 392, "ElikaGorica": 393, "Amiko-Regular": 394, "EFCOBrookshire-Regular": 395, "Caladea-Regular": 396, "MoonlightBold": 397, "Staatliches-Regular": 398, "Helios-Bold": 399, "Satisfy-Regular": 400, "NexaScript-Regular": 401, "Trocchi-Regular": 402, "March": 403, "IbarraRealNova-Regular": 404, "Nectarine-Regular": 405, "Overpass-Light": 406, "TruetypewriterPolyglOTT": 407, "Bangers-Regular": 408, "Lazord-BoldExpandedItalic": 409, "Chloe-Regular": 410, "BaskervilleDisplayPT-Regular": 411, "Bright-Regular": 412, "Vollkorn-Regular": 413, "Harmattan": 414, "SortsMillGoudy-Regular": 415, "Biryani-Bold": 416, "SugoProDisplay-Italic": 417, "Lazord-BoldItalic": 418, "Alike-Regular": 419, "PermanentMarker-Regular": 420, "Sacramento-Regular": 421, "HKGroteskPro-Italic": 422, "Aleo-BoldItalic": 423, "Noot": 424, "TANGARLAND-Regular": 425, "Twister": 426, "Arsenal-Italic": 427, "Bogart-Italic": 428, "BethEllen-Regular": 429, "Caveat-Regular": 430, "BalsamiqSans-Bold": 431, "BreeSerif-Regular": 432, "CodecPro-ExtraBold": 433, "Pierson-Light": 434, "CyGrotesk-WideRegular": 435, "Lumios-Marker": 436, "Comfortaa-Bold": 437, "TraceFontRegular": 438, "RTL-AdamScript-Regular": 439, "EastmanGrotesque-Italic": 440, "Kalam-Bold": 441, "ChauPhilomeneOne-Regular": 442, "Coiny-Regular": 443, "Lovera": 444, "Gellatio": 445, "TitilliumWeb-Bold": 446, "OilvareBase-Italic": 447, "Catamaran-Black": 448, "Anteb-Italic": 449, "SueEllenFrancisco": 450, "SweetApricot": 451, "BrightSunshine": 452, "IM_FELL_Double_Pica_Italic": 453, "Granaina-limpia": 454, "TANPARFAIT": 455, "AcherusGrotesque-Regular": 456, "AwesomeLathusca-Italic": 457, "Signika-Bold": 458, "Andasia": 459, "DO-AllCaps-Slanted": 460, "Zenaida-Regular": 461, "Fahkwang-Regular": 462, "Play-Regular": 463, "BERNIERRegular-Regular": 464, "PlumaThin-Regular": 465, "SportsWorld": 466, "Garet-Black": 467, "CarolloPlayscript-BlackItalic": 468, "Cheque-Regular": 469, "SEGO": 470, "BobbyJones-Condensed": 471, "NexaSlab-RegularItalic": 472, "DancingScript-Regular": 473, "PaalalabasDisplayWideBETA": 474, "Magnolia-Script": 475, "OpunMai-400It": 476, "MadelynFill-Regular": 477, "ZingRust-Base": 478, "FingerPaint-Regular": 479, "BostonAngel-Light": 480, "Gliker-RegularExpanded": 481, "Ahsing": 482, "Engagement-Regular": 483, "EyesomeScript": 484, "LibraSerifModern-Regular": 485, "London-Regular": 486, "AtkinsonHyperlegible-Regular": 487, "StadioNow-TextItalic": 488, "Aniyah": 489, "ITCAvantGardePro-Bold": 490, "Comica-Regular": 491, "Coustard-Regular": 492, "Brice-BoldCondensed": 493, "TANNEWYORK-Bold": 494, "TANBUSTER-Bold": 495, "Alatsi-Regular": 496, "TYSerif-Book": 497, "Jingleberry": 498, "Rajdhani-Bold": 499, "LobsterTwo-BoldItalic": 500, "BestLight-Medium": 501, "Hitchcut-Regular": 502, "GermaniaOne-Regular": 503, "Emitha-Script": 504, "LemonTuesday": 505, "Cubao_Free_Regular": 506, "MonterchiSerif-Regular": 507, "AllertaStencil-Regular": 508, "RTL-Sondos-Regular": 509, "HomemadeApple-Regular": 510, "CosmicOcto-Medium": 511}
|
assets/multilingual_cn-en_font_idx.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"en-Montserrat-Regular": 0, "en-Poppins-Italic": 1, "en-GlacialIndifference-Regular": 2, "en-OpenSans-ExtraBoldItalic": 3, "en-Montserrat-Bold": 4, "en-Now-Regular": 5, "en-Garet-Regular": 6, "en-LeagueSpartan-Bold": 7, "en-DMSans-Regular": 8, "en-OpenSauceOne-Regular": 9, "en-OpenSans-ExtraBold": 10, "en-KGPrimaryPenmanship": 11, "en-Anton-Regular": 12, "en-Aileron-BlackItalic": 13, "en-Quicksand-Light": 14, "en-Roboto-BoldItalic": 15, "en-TheSeasons-It": 16, "en-Kollektif": 17, "en-Inter-BoldItalic": 18, "en-Poppins-Medium": 19, "en-Poppins-Light": 20, "en-RoxboroughCF-RegularItalic": 21, "en-PlayfairDisplay-SemiBold": 22, "en-Agrandir-Italic": 23, "en-Lato-Regular": 24, "en-MoreSugarRegular": 25, "en-CanvaSans-RegularItalic": 26, "en-PublicSans-Italic": 27, "en-CodePro-NormalLC": 28, "en-Belleza-Regular": 29, "en-JosefinSans-Bold": 30, "en-HKGrotesk-Bold": 31, "en-Telegraf-Medium": 32, "en-BrittanySignatureRegular": 33, "en-Raleway-ExtraBoldItalic": 34, "en-Mont-RegularItalic": 35, "en-Arimo-BoldItalic": 36, "en-Lora-Italic": 37, "en-ArchivoBlack-Regular": 38, "en-Poppins": 39, "en-Barlow-Black": 40, "en-CormorantGaramond-Bold": 41, "en-LibreBaskerville-Regular": 42, "en-CanvaSchoolFontRegular": 43, "en-BebasNeueBold": 44, "en-LazydogRegular": 45, "en-FredokaOne-Regular": 46, "en-Horizon-Bold": 47, "en-Nourd-Regular": 48, "en-Hatton-Regular": 49, "en-Nunito-ExtraBoldItalic": 50, "en-CerebriSans-Regular": 51, "en-Montserrat-Light": 52, "en-TenorSans": 53, "en-Norwester-Regular": 54, "en-ClearSans-Bold": 55, "en-Cardo-Regular": 56, "en-Alice-Regular": 57, "en-Oswald-Regular": 58, "en-Gaegu-Bold": 59, "en-Muli-Black": 60, "en-TAN-PEARL-Regular": 61, "en-CooperHewitt-Book": 62, "en-Agrandir-Grand": 63, "en-BlackMango-Thin": 64, "en-DMSerifDisplay-Regular": 65, "en-Antonio-Bold": 66, "en-Sniglet-Regular": 67, "en-BeVietnam-Regular": 68, "en-NunitoSans10pt-BlackItalic": 69, "en-AbhayaLibre-ExtraBold": 70, "en-Rubik-Regular": 71, "en-PPNeueMachina-Regular": 72, "en-TAN - MON CHERI-Regular": 73, "en-Jua-Regular": 74, "en-Playlist-Script": 75, "en-SourceSansPro-BoldItalic": 76, "en-MoonTime-Regular": 77, "en-Eczar-ExtraBold": 78, "en-Gatwick-Regular": 79, "en-MonumentExtended-Regular": 80, "en-BarlowSemiCondensed-Regular": 81, "en-BarlowCondensed-Regular": 82, "en-Alegreya-Regular": 83, "en-DreamAvenue": 84, "en-RobotoCondensed-Italic": 85, "en-BobbyJones-Regular": 86, "en-Garet-ExtraBold": 87, "en-YesevaOne-Regular": 88, "en-Dosis-ExtraBold": 89, "en-LeagueGothic-Regular": 90, "en-OpenSans-Italic": 91, "en-TANAEGEAN-Regular": 92, "en-Maharlika-Regular": 93, "en-MarykateRegular": 94, "en-Cinzel-Regular": 95, "en-Agrandir-Wide": 96, "en-Chewy-Regular": 97, "en-BodoniFLF-BoldItalic": 98, "en-Nunito-BlackItalic": 99, "en-LilitaOne": 100, "en-HandyCasualCondensed-Regular": 101, "en-Ovo": 102, "en-Livvic-Regular": 103, "en-Agrandir-Narrow": 104, "en-CrimsonPro-Italic": 105, "en-AnonymousPro-Bold": 106, "en-NF-OneLittleFont-Bold": 107, "en-RedHatDisplay-BoldItalic": 108, "en-CodecPro-Regular": 109, "en-HalimunRegular": 110, "en-LibreFranklin-Black": 111, "en-TeXGyreTermes-BoldItalic": 112, "en-Shrikhand-Regular": 113, "en-TTNormsPro-Italic": 114, "en-Gagalin-Regular": 115, "en-OpenSans-Bold": 116, "en-GreatVibes-Regular": 117, "en-Breathing": 118, "en-HeroLight-Regular": 119, "en-KGPrimaryDots": 120, "en-Quicksand-Bold": 121, "en-Brice-ExtraLightSemiExpanded": 122, "en-Lato-BoldItalic": 123, "en-Fraunces9pt-Italic": 124, "en-AbrilFatface-Regular": 125, "en-BerkshireSwash-Regular": 126, "en-Atma-Bold": 127, "en-HolidayRegular": 128, "en-BebasNeueCyrillic": 129, "en-IntroRust-Base": 130, "en-Gistesy": 131, "en-BDScript-Regular": 132, "en-ApricotsRegular": 133, "en-Prompt-Black": 134, "en-TAN MERINGUE": 135, "en-Sukar Regular": 136, "en-GentySans-Regular": 137, "en-NeueEinstellung-Normal": 138, "en-Garet-Bold": 139, "en-FiraSans-Black": 140, "en-BantayogLight": 141, "en-NotoSerifDisplay-Black": 142, "en-TTChocolates-Regular": 143, "en-Ubuntu-Regular": 144, "en-Assistant-Bold": 145, "en-ABeeZee-Regular": 146, "en-LexendDeca-Regular": 147, "en-KingredSerif": 148, "en-Radley-Regular": 149, "en-BrownSugar": 150, "en-MigraItalic-ExtraboldItalic": 151, "en-ChildosArabic-Regular": 152, "en-PeaceSans": 153, "en-LondrinaSolid-Black": 154, "en-SpaceMono-BoldItalic": 155, "en-RobotoMono-Light": 156, "en-CourierPrime-Regular": 157, "en-Alata-Regular": 158, "en-Amsterdam-One": 159, "en-IreneFlorentina-Regular": 160, "en-CatchyMager": 161, "en-Alta_regular": 162, "en-ArticulatCF-Regular": 163, "en-Raleway-Regular": 164, "en-BrasikaDisplay": 165, "en-TANAngleton-Italic": 166, "en-NotoSerifDisplay-ExtraCondensedItalic": 167, "en-Bryndan Write": 168, "en-TTCommonsPro-It": 169, "en-AlexBrush-Regular": 170, "en-Antic-Regular": 171, "en-TTHoves-Bold": 172, "en-DroidSerif": 173, "en-AblationRegular": 174, "en-Marcellus-Regular": 175, "en-Sanchez-Italic": 176, "en-JosefinSans": 177, "en-Afrah-Regular": 178, "en-PinyonScript": 179, "en-TTInterphases-BoldItalic": 180, "en-Yellowtail-Regular": 181, "en-Gliker-Regular": 182, "en-BobbyJonesSoft-Regular": 183, "en-IBMPlexSans": 184, "en-Amsterdam-Three": 185, "en-Amsterdam-FourSlant": 186, "en-TTFors-Regular": 187, "en-Quattrocento": 188, "en-Sifonn-Basic": 189, "en-AlegreyaSans-Black": 190, "en-Daydream": 191, "en-AristotelicaProTx-Rg": 192, "en-NotoSerif": 193, "en-EBGaramond-Italic": 194, "en-HammersmithOne-Regular": 195, "en-RobotoSlab-Regular": 196, "en-DO-Sans-Regular": 197, "en-KGPrimaryDotsLined": 198, "en-Blinker-Regular": 199, "en-TAN NIMBUS": 200, "en-Blueberry-Regular": 201, "en-Rosario-Regular": 202, "en-Forum": 203, "en-MistrullyRegular": 204, "en-SourceSerifPro-Regular": 205, "en-Bugaki-Regular": 206, "en-CMUSerif-Roman": 207, "en-GulfsDisplay-NormalItalic": 208, "en-PTSans-Bold": 209, "en-Sensei-Medium": 210, "en-SquadaOne-Regular": 211, "en-Arapey-Italic": 212, "en-Parisienne-Regular": 213, "en-Aleo-Italic": 214, "en-QuicheDisplay-Italic": 215, "en-RocaOne-It": 216, "en-Funtastic-Regular": 217, "en-PTSerif-BoldItalic": 218, "en-Muller-RegularItalic": 219, "en-ArgentCF-Regular": 220, "en-Brightwall-Italic": 221, "en-Knewave-Regular": 222, "en-TYSerif-D": 223, "en-Agrandir-Tight": 224, "en-AlfaSlabOne-Regular": 225, "en-TANTangkiwood-Display": 226, "en-Kief-Montaser-Regular": 227, "en-Gotham-Book": 228, "en-JuliusSansOne-Regular": 229, "en-CocoGothic-Italic": 230, "en-SairaCondensed-Regular": 231, "en-DellaRespira-Regular": 232, "en-Questrial-Regular": 233, "en-BukhariScript-Regular": 234, "en-HelveticaWorld-Bold": 235, "en-TANKINDRED-Display": 236, "en-CinzelDecorative-Regular": 237, "en-Vidaloka-Regular": 238, "en-AlegreyaSansSC-Black": 239, "en-FeelingPassionate-Regular": 240, "en-QuincyCF-Regular": 241, "en-FiraCode-Regular": 242, "en-Genty-Regular": 243, "en-Nickainley-Normal": 244, "en-RubikOne-Regular": 245, "en-Gidole-Regular": 246, "en-Borsok": 247, "en-Gordita-RegularItalic": 248, "en-Scripter-Regular": 249, "en-Buffalo-Regular": 250, "en-KleinText-Regular": 251, "en-Creepster-Regular": 252, "en-Arvo-Bold": 253, "en-GabrielSans-NormalItalic": 254, "en-Heebo-Black": 255, "en-LexendExa-Regular": 256, "en-BrixtonSansTC-Regular": 257, "en-GildaDisplay-Regular": 258, "en-ChunkFive-Roman": 259, "en-Amaranth-BoldItalic": 260, "en-BubbleboddyNeue-Regular": 261, "en-MavenPro-Bold": 262, "en-TTDrugs-Italic": 263, "en-CyGrotesk-KeyRegular": 264, "en-VarelaRound-Regular": 265, "en-Ruda-Black": 266, "en-SafiraMarch": 267, "en-BloggerSans": 268, "en-TANHEADLINE-Regular": 269, "en-SloopScriptPro-Regular": 270, "en-NeueMontreal-Regular": 271, "en-Schoolbell-Regular": 272, "en-SigherRegular": 273, "en-InriaSerif-Regular": 274, "en-JetBrainsMono-Regular": 275, "en-MADEEvolveSans": 276, "en-Dekko": 277, "en-Handyman-Regular": 278, "en-Aileron-BoldItalic": 279, "en-Bright-Italic": 280, "en-Solway-Regular": 281, "en-Higuen-Regular": 282, "en-WedgesItalic": 283, "en-TANASHFORD-BOLD": 284, "en-IBMPlexMono": 285, "en-RacingSansOne-Regular": 286, "en-RegularBrush": 287, "en-OpenSans-LightItalic": 288, "en-SpecialElite-Regular": 289, "en-FuturaLTPro-Medium": 290, "en-MaragsaDisplay": 291, "en-BigShouldersDisplay-Regular": 292, "en-BDSans-Regular": 293, "en-RasputinRegular": 294, "en-Yvesyvesdrawing-BoldItalic": 295, "en-Bitter-Regular": 296, "en-LuckiestGuy-Regular": 297, "en-CanvaSchoolFontDotted": 298, "en-TTFirsNeue-Italic": 299, "en-Sunday-Regular": 300, "en-HKGothic-MediumItalic": 301, "en-CaveatBrush-Regular": 302, "en-HeliosExt": 303, "en-ArchitectsDaughter-Regular": 304, "en-Angelina": 305, "en-Calistoga-Regular": 306, "en-ArchivoNarrow-Regular": 307, "en-ObjectSans-MediumSlanted": 308, "en-AyrLucidityCondensed-Regular": 309, "en-Nexa-RegularItalic": 310, "en-Lustria-Regular": 311, "en-Amsterdam-TwoSlant": 312, "en-Virtual-Regular": 313, "en-Brusher-Regular": 314, "en-NF-Lepetitcochon-Regular": 315, "en-TANTWINKLE": 316, "en-LeJour-Serif": 317, "en-Prata-Regular": 318, "en-PPWoodland-Regular": 319, "en-PlayfairDisplay-BoldItalic": 320, "en-AmaticSC-Regular": 321, "en-Cabin-Regular": 322, "en-Manjari-Bold": 323, "en-MrDafoe-Regular": 324, "en-TTRamillas-Italic": 325, "en-Luckybones-Bold": 326, "en-DarkerGrotesque-Light": 327, "en-BellabooRegular": 328, "en-CormorantSC-Bold": 329, "en-GochiHand-Regular": 330, "en-Atteron": 331, "en-RocaTwo-Lt": 332, "en-ZCOOLXiaoWei-Regular": 333, "en-TANSONGBIRD": 334, "en-HeadingNow-74Regular": 335, "en-Luthier-BoldItalic": 336, "en-Oregano-Regular": 337, "en-AyrTropikaIsland-Int": 338, "en-Mali-Regular": 339, "en-DidactGothic-Regular": 340, "en-Lovelace-Regular": 341, "en-BakerieSmooth-Regular": 342, "en-CarterOne": 343, "en-HussarBd": 344, "en-OldStandard-Italic": 345, "en-TAN-ASTORIA-Display": 346, "en-rugratssans-Regular": 347, "en-BMHANNA": 348, "en-BetterSaturday": 349, "en-AdigianaToybox": 350, "en-Sailors": 351, "en-PlayfairDisplaySC-Italic": 352, "en-Etna-Regular": 353, "en-Revive80Signature": 354, "en-CAGenerated": 355, "en-Poppins-Regular": 356, "en-Jonathan-Regular": 357, "en-Pacifico-Regular": 358, "en-Saira-Black": 359, "en-Loubag-Regular": 360, "en-Decalotype-Black": 361, "en-Mansalva-Regular": 362, "en-Allura-Regular": 363, "en-ProximaNova-Bold": 364, "en-TANMIGNON-DISPLAY": 365, "en-ArsenicaAntiqua-Regular": 366, "en-BreulGroteskA-RegularItalic": 367, "en-HKModular-Bold": 368, "en-TANNightingale-Regular": 369, "en-AristotelicaProCndTxt-Rg": 370, "en-Aprila-Regular": 371, "en-Tomorrow-Regular": 372, "en-AngellaWhite": 373, "en-KaushanScript-Regular": 374, "en-NotoSans": 375, "en-LeJour-Script": 376, "en-BrixtonTC-Regular": 377, "en-OleoScript-Regular": 378, "en-Cakerolli-Regular": 379, "en-Lobster-Regular": 380, "en-FrunchySerif-Regular": 381, "en-PorcelainRegular": 382, "en-AlojaExtended": 383, "en-SergioTrendy-Italic": 384, "en-LovelaceText-Bold": 385, "en-Anaktoria": 386, "en-JimmyScript-Light": 387, "en-IBMPlexSerif": 388, "en-Marta": 389, "en-Mango-Regular": 390, "en-Overpass-Italic": 391, "en-Hagrid-Regular": 392, "en-ElikaGorica": 393, "en-Amiko-Regular": 394, "en-EFCOBrookshire-Regular": 395, "en-Caladea-Regular": 396, "en-MoonlightBold": 397, "en-Staatliches-Regular": 398, "en-Helios-Bold": 399, "en-Satisfy-Regular": 400, "en-NexaScript-Regular": 401, "en-Trocchi-Regular": 402, "en-March": 403, "en-IbarraRealNova-Regular": 404, "en-Nectarine-Regular": 405, "en-Overpass-Light": 406, "en-TruetypewriterPolyglOTT": 407, "en-Bangers-Regular": 408, "en-Lazord-BoldExpandedItalic": 409, "en-Chloe-Regular": 410, "en-BaskervilleDisplayPT-Regular": 411, "en-Bright-Regular": 412, "en-Vollkorn-Regular": 413, "en-Harmattan": 414, "en-SortsMillGoudy-Regular": 415, "en-Biryani-Bold": 416, "en-SugoProDisplay-Italic": 417, "en-Lazord-BoldItalic": 418, "en-Alike-Regular": 419, "en-PermanentMarker-Regular": 420, "en-Sacramento-Regular": 421, "en-HKGroteskPro-Italic": 422, "en-Aleo-BoldItalic": 423, "en-Noot": 424, "en-TANGARLAND-Regular": 425, "en-Twister": 426, "en-Arsenal-Italic": 427, "en-Bogart-Italic": 428, "en-BethEllen-Regular": 429, "en-Caveat-Regular": 430, "en-BalsamiqSans-Bold": 431, "en-BreeSerif-Regular": 432, "en-CodecPro-ExtraBold": 433, "en-Pierson-Light": 434, "en-CyGrotesk-WideRegular": 435, "en-Lumios-Marker": 436, "en-Comfortaa-Bold": 437, "en-TraceFontRegular": 438, "en-RTL-AdamScript-Regular": 439, "en-EastmanGrotesque-Italic": 440, "en-Kalam-Bold": 441, "en-ChauPhilomeneOne-Regular": 442, "en-Coiny-Regular": 443, "en-Lovera": 444, "en-Gellatio": 445, "en-TitilliumWeb-Bold": 446, "en-OilvareBase-Italic": 447, "en-Catamaran-Black": 448, "en-Anteb-Italic": 449, "en-SueEllenFrancisco": 450, "en-SweetApricot": 451, "en-BrightSunshine": 452, "en-IM_FELL_Double_Pica_Italic": 453, "en-Granaina-limpia": 454, "en-TANPARFAIT": 455, "en-AcherusGrotesque-Regular": 456, "en-AwesomeLathusca-Italic": 457, "en-Signika-Bold": 458, "en-Andasia": 459, "en-DO-AllCaps-Slanted": 460, "en-Zenaida-Regular": 461, "en-Fahkwang-Regular": 462, "en-Play-Regular": 463, "en-BERNIERRegular-Regular": 464, "en-PlumaThin-Regular": 465, "en-SportsWorld": 466, "en-Garet-Black": 467, "en-CarolloPlayscript-BlackItalic": 468, "en-Cheque-Regular": 469, "en-SEGO": 470, "en-BobbyJones-Condensed": 471, "en-NexaSlab-RegularItalic": 472, "en-DancingScript-Regular": 473, "en-PaalalabasDisplayWideBETA": 474, "en-Magnolia-Script": 475, "en-OpunMai-400It": 476, "en-MadelynFill-Regular": 477, "en-ZingRust-Base": 478, "en-FingerPaint-Regular": 479, "en-BostonAngel-Light": 480, "en-Gliker-RegularExpanded": 481, "en-Ahsing": 482, "en-Engagement-Regular": 483, "en-EyesomeScript": 484, "en-LibraSerifModern-Regular": 485, "en-London-Regular": 486, "en-AtkinsonHyperlegible-Regular": 487, "en-StadioNow-TextItalic": 488, "en-Aniyah": 489, "en-ITCAvantGardePro-Bold": 490, "en-Comica-Regular": 491, "en-Coustard-Regular": 492, "en-Brice-BoldCondensed": 493, "en-TANNEWYORK-Bold": 494, "en-TANBUSTER-Bold": 495, "en-Alatsi-Regular": 496, "en-TYSerif-Book": 497, "en-Jingleberry": 498, "en-Rajdhani-Bold": 499, "en-LobsterTwo-BoldItalic": 500, "en-BestLight-Medium": 501, "en-Hitchcut-Regular": 502, "en-GermaniaOne-Regular": 503, "en-Emitha-Script": 504, "en-LemonTuesday": 505, "en-Cubao_Free_Regular": 506, "en-MonterchiSerif-Regular": 507, "en-AllertaStencil-Regular": 508, "en-RTL-Sondos-Regular": 509, "en-HomemadeApple-Regular": 510, "en-CosmicOcto-Medium": 511, "cn-HelloFont-FangHuaTi": 0, "cn-HelloFont-ID-DianFangSong-Bold": 1, "cn-HelloFont-ID-DianFangSong": 2, "cn-HelloFont-ID-DianHei-CEJ": 3, "cn-HelloFont-ID-DianHei-DEJ": 4, "cn-HelloFont-ID-DianHei-EEJ": 5, "cn-HelloFont-ID-DianHei-FEJ": 6, "cn-HelloFont-ID-DianHei-GEJ": 7, "cn-HelloFont-ID-DianKai-Bold": 8, "cn-HelloFont-ID-DianKai": 9, "cn-HelloFont-WenYiHei": 10, "cn-Hellofont-ID-ChenYanXingKai": 11, "cn-Hellofont-ID-DaZiBao": 12, "cn-Hellofont-ID-DaoCaoRen": 13, "cn-Hellofont-ID-JianSong": 14, "cn-Hellofont-ID-JiangHuZhaoPaiHei": 15, "cn-Hellofont-ID-KeSong": 16, "cn-Hellofont-ID-LeYuanTi": 17, "cn-Hellofont-ID-Pinocchio": 18, "cn-Hellofont-ID-QiMiaoTi": 19, "cn-Hellofont-ID-QingHuaKai": 20, "cn-Hellofont-ID-QingHuaXingKai": 21, "cn-Hellofont-ID-ShanShuiXingKai": 22, "cn-Hellofont-ID-ShouXieQiShu": 23, "cn-Hellofont-ID-ShouXieTongZhenTi": 24, "cn-Hellofont-ID-TengLingTi": 25, "cn-Hellofont-ID-XiaoLiShu": 26, "cn-Hellofont-ID-XuanZhenSong": 27, "cn-Hellofont-ID-ZhongLingXingKai": 28, "cn-HellofontIDJiaoTangTi": 29, "cn-HellofontIDJiuZhuTi": 30, "cn-HuXiaoBao-SaoBao": 31, "cn-HuXiaoBo-NanShen": 32, "cn-HuXiaoBo-ZhenShuai": 33, "cn-SourceHanSansSC-Bold": 34, "cn-SourceHanSansSC-ExtraLight": 35, "cn-SourceHanSansSC-Heavy": 36, "cn-SourceHanSansSC-Light": 37, "cn-SourceHanSansSC-Medium": 38, "cn-SourceHanSansSC-Normal": 39, "cn-SourceHanSansSC-Regular": 40, "cn-SourceHanSerifSC-Bold": 41, "cn-SourceHanSerifSC-ExtraLight": 42, "cn-SourceHanSerifSC-Heavy": 43, "cn-SourceHanSerifSC-Light": 44, "cn-SourceHanSerifSC-Medium": 45, "cn-SourceHanSerifSC-Regular": 46, "cn-SourceHanSerifSC-SemiBold": 47, "cn-xiaowei": 48, "cn-AaJianHaoTi": 49, "cn-AlibabaPuHuiTi-Bold": 50, "cn-AlibabaPuHuiTi-Heavy": 51, "cn-AlibabaPuHuiTi-Light": 52, "cn-AlibabaPuHuiTi-Medium": 53, "cn-AlibabaPuHuiTi-Regular": 54, "cn-CanvaAcidBoldSC": 55, "cn-CanvaBreezeCN": 56, "cn-CanvaBumperCropSC": 57, "cn-CanvaCakeShopCN": 58, "cn-CanvaEndeavorBlackSC": 59, "cn-CanvaJoyHeiCN": 60, "cn-CanvaLiCN": 61, "cn-CanvaOrientalBrushCN": 62, "cn-CanvaPoster": 63, "cn-CanvaQinfuCalligraphyCN": 64, "cn-CanvaSweetHeartCN": 65, "cn-CanvaSwordLikeDreamCN": 66, "cn-CanvaTangyuanHandwritingCN": 67, "cn-CanvaWanderWorldCN": 68, "cn-CanvaWenCN": 69, "cn-DianZiChunYi": 70, "cn-GenSekiGothicTW-H": 71, "cn-GenWanMinTW-L": 72, "cn-GenYoMinTW-B": 73, "cn-GenYoMinTW-EL": 74, "cn-GenYoMinTW-H": 75, "cn-GenYoMinTW-M": 76, "cn-GenYoMinTW-R": 77, "cn-GenYoMinTW-SB": 78, "cn-HYQiHei-AZEJ": 79, "cn-HYQiHei-EES": 80, "cn-HanaMinA": 81, "cn-HappyZcool-2016": 82, "cn-HelloFont ZJ KeKouKeAiTi": 83, "cn-HelloFont-ID-BoBoTi": 84, "cn-HelloFont-ID-FuGuHei-25": 85, "cn-HelloFont-ID-FuGuHei-35": 86, "cn-HelloFont-ID-FuGuHei-45": 87, "cn-HelloFont-ID-FuGuHei-55": 88, "cn-HelloFont-ID-FuGuHei-65": 89, "cn-HelloFont-ID-FuGuHei-75": 90, "cn-HelloFont-ID-FuGuHei-85": 91, "cn-HelloFont-ID-HeiKa": 92, "cn-HelloFont-ID-HeiTang": 93, "cn-HelloFont-ID-JianSong-95": 94, "cn-HelloFont-ID-JueJiangHei-50": 95, "cn-HelloFont-ID-JueJiangHei-55": 96, "cn-HelloFont-ID-JueJiangHei-60": 97, "cn-HelloFont-ID-JueJiangHei-65": 98, "cn-HelloFont-ID-JueJiangHei-70": 99, "cn-HelloFont-ID-JueJiangHei-75": 100, "cn-HelloFont-ID-JueJiangHei-80": 101, "cn-HelloFont-ID-KuHeiTi": 102, "cn-HelloFont-ID-LingDongTi": 103, "cn-HelloFont-ID-LingLiTi": 104, "cn-HelloFont-ID-MuFengTi": 105, "cn-HelloFont-ID-NaiNaiJiangTi": 106, "cn-HelloFont-ID-PangDu": 107, "cn-HelloFont-ID-ReLieTi": 108, "cn-HelloFont-ID-RouRun": 109, "cn-HelloFont-ID-SaShuangShouXieTi": 110, "cn-HelloFont-ID-WangZheFengFan": 111, "cn-HelloFont-ID-YouQiTi": 112, "cn-Hellofont-ID-XiaLeTi": 113, "cn-Hellofont-ID-XianXiaTi": 114, "cn-HuXiaoBoKuHei": 115, "cn-IDDanMoXingKai": 116, "cn-IDJueJiangHei": 117, "cn-IDMeiLingTi": 118, "cn-IDQQSugar": 119, "cn-LiuJianMaoCao-Regular": 120, "cn-LongCang-Regular": 121, "cn-MaShanZheng-Regular": 122, "cn-PangMenZhengDao-3": 123, "cn-PangMenZhengDao-Cu": 124, "cn-PangMenZhengDao": 125, "cn-SentyCaramel": 126, "cn-SourceHanSerifSC": 127, "cn-WenCang-Regular": 128, "cn-WenQuanYiMicroHei": 129, "cn-XianErTi": 130, "cn-YRDZSTJF": 131, "cn-YS-HelloFont-BangBangTi": 132, "cn-ZCOOLKuaiLe-Regular": 133, "cn-ZCOOLQingKeHuangYou-Regular": 134, "cn-ZCOOLXiaoWei-Regular": 135, "cn-ZCOOL_KuHei": 136, "cn-ZhiMangXing-Regular": 137, "cn-baotuxiaobaiti": 138, "cn-jiangxizhuokai-Regular": 139, "cn-zcool-gdh": 140, "cn-zcoolqingkehuangyouti-Regular": 141, "cn-zcoolwenyiti": 142}
|
checkpoints/glyph-sdxl/byt5_mapper.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d8e8c5ac933bc21e80287d2c96aa64f6e03a1936094a4dc8906ab78ecb61063
|
3 |
+
size 301553807
|
checkpoints/glyph-sdxl/byt5_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edea29b75df65cf6ed3a8c79341292b962eed5c1dc0c111b7dc10e0817d5341c
|
3 |
+
size 874506157
|
checkpoints/glyph-sdxl/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:066c9516b1b436ce2ce2aa052fdc272a6daa09e857d876ddf956229df62dbd1e
|
3 |
+
size 3839437754
|
checkpoints/glyph-sdxl/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b76dcf1db79cb067cf7fa4cbecbf2df9c18cc4780e14c75a5081dffd64221c95
|
3 |
+
size 988
|
checkpoints/glyph-sdxl/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aac7effc1e494aeaf74df69283e6c01de6153aa5857e62e82aeb7c9d24c23df
|
3 |
+
size 1064
|
checkpoints/glyph-sdxl/unet_inserted_attn.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b6af4376281be262f3b52ca0b16b0244099161693f65a7db352f53878481767
|
3 |
+
size 908
|
checkpoints/glyph-sdxl/unet_lora.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ac9ec31fbe654b5822caa6a83b1c83c4a09e536a0cd0b23fa5985824260662c
|
3 |
+
size 743590514
|
configs/glyph_multilingual_sdxl_albedo.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#### Model Setting
|
2 |
+
pretrained_model_name_or_path = 'stablediffusionapi/albedobase-xl-20'
|
3 |
+
pretrained_vae_model_name_or_path = 'madebyollin/sdxl-vae-fp16-fix'
|
4 |
+
revision = None
|
5 |
+
|
6 |
+
byt5_max_length = 512
|
7 |
+
byt5_mapper_type = 'T5EncoderBlockByT5Mapper'
|
8 |
+
byt5_mapper_config = dict(
|
9 |
+
num_layers=4,
|
10 |
+
sdxl_channels=2048,
|
11 |
+
)
|
12 |
+
byt5_config = dict(
|
13 |
+
byt5_name='google/byt5-small',
|
14 |
+
special_token=True,
|
15 |
+
color_special_token=True,
|
16 |
+
font_special_token=True,
|
17 |
+
color_ann_path='assets/color_idx.json',
|
18 |
+
font_ann_path='assets/multilingual_cn-en_font_idx.json',
|
19 |
+
multilingual=True,
|
20 |
+
)
|
21 |
+
|
22 |
+
attn_block_to_modify = [
|
23 |
+
"down_blocks.1.attentions.0.transformer_blocks.0",
|
24 |
+
"down_blocks.1.attentions.0.transformer_blocks.1",
|
25 |
+
"down_blocks.1.attentions.1.transformer_blocks.0",
|
26 |
+
"down_blocks.1.attentions.1.transformer_blocks.1",
|
27 |
+
"down_blocks.2.attentions.0.transformer_blocks.0",
|
28 |
+
"down_blocks.2.attentions.0.transformer_blocks.1",
|
29 |
+
"down_blocks.2.attentions.0.transformer_blocks.2",
|
30 |
+
"down_blocks.2.attentions.0.transformer_blocks.3",
|
31 |
+
"down_blocks.2.attentions.0.transformer_blocks.4",
|
32 |
+
"down_blocks.2.attentions.0.transformer_blocks.5",
|
33 |
+
"down_blocks.2.attentions.0.transformer_blocks.6",
|
34 |
+
"down_blocks.2.attentions.0.transformer_blocks.7",
|
35 |
+
"down_blocks.2.attentions.0.transformer_blocks.8",
|
36 |
+
"down_blocks.2.attentions.0.transformer_blocks.9",
|
37 |
+
"down_blocks.2.attentions.1.transformer_blocks.0",
|
38 |
+
"down_blocks.2.attentions.1.transformer_blocks.1",
|
39 |
+
"down_blocks.2.attentions.1.transformer_blocks.2",
|
40 |
+
"down_blocks.2.attentions.1.transformer_blocks.3",
|
41 |
+
"down_blocks.2.attentions.1.transformer_blocks.4",
|
42 |
+
"down_blocks.2.attentions.1.transformer_blocks.5",
|
43 |
+
"down_blocks.2.attentions.1.transformer_blocks.6",
|
44 |
+
"down_blocks.2.attentions.1.transformer_blocks.7",
|
45 |
+
"down_blocks.2.attentions.1.transformer_blocks.8",
|
46 |
+
"down_blocks.2.attentions.1.transformer_blocks.9",
|
47 |
+
"up_blocks.0.attentions.0.transformer_blocks.0",
|
48 |
+
"up_blocks.0.attentions.0.transformer_blocks.1",
|
49 |
+
"up_blocks.0.attentions.0.transformer_blocks.2",
|
50 |
+
"up_blocks.0.attentions.0.transformer_blocks.3",
|
51 |
+
"up_blocks.0.attentions.0.transformer_blocks.4",
|
52 |
+
"up_blocks.0.attentions.0.transformer_blocks.5",
|
53 |
+
"up_blocks.0.attentions.0.transformer_blocks.6",
|
54 |
+
"up_blocks.0.attentions.0.transformer_blocks.7",
|
55 |
+
"up_blocks.0.attentions.0.transformer_blocks.8",
|
56 |
+
"up_blocks.0.attentions.0.transformer_blocks.9",
|
57 |
+
"up_blocks.0.attentions.1.transformer_blocks.0",
|
58 |
+
"up_blocks.0.attentions.1.transformer_blocks.1",
|
59 |
+
"up_blocks.0.attentions.1.transformer_blocks.2",
|
60 |
+
"up_blocks.0.attentions.1.transformer_blocks.3",
|
61 |
+
"up_blocks.0.attentions.1.transformer_blocks.4",
|
62 |
+
"up_blocks.0.attentions.1.transformer_blocks.5",
|
63 |
+
"up_blocks.0.attentions.1.transformer_blocks.6",
|
64 |
+
"up_blocks.0.attentions.1.transformer_blocks.7",
|
65 |
+
"up_blocks.0.attentions.1.transformer_blocks.8",
|
66 |
+
"up_blocks.0.attentions.1.transformer_blocks.9",
|
67 |
+
"up_blocks.0.attentions.2.transformer_blocks.0",
|
68 |
+
"up_blocks.0.attentions.2.transformer_blocks.1",
|
69 |
+
"up_blocks.0.attentions.2.transformer_blocks.2",
|
70 |
+
"up_blocks.0.attentions.2.transformer_blocks.3",
|
71 |
+
"up_blocks.0.attentions.2.transformer_blocks.4",
|
72 |
+
"up_blocks.0.attentions.2.transformer_blocks.5",
|
73 |
+
"up_blocks.0.attentions.2.transformer_blocks.6",
|
74 |
+
"up_blocks.0.attentions.2.transformer_blocks.7",
|
75 |
+
"up_blocks.0.attentions.2.transformer_blocks.8",
|
76 |
+
"up_blocks.0.attentions.2.transformer_blocks.9",
|
77 |
+
"up_blocks.1.attentions.0.transformer_blocks.0",
|
78 |
+
"up_blocks.1.attentions.0.transformer_blocks.1",
|
79 |
+
"up_blocks.1.attentions.1.transformer_blocks.0",
|
80 |
+
"up_blocks.1.attentions.1.transformer_blocks.1",
|
81 |
+
"up_blocks.1.attentions.2.transformer_blocks.0",
|
82 |
+
"up_blocks.1.attentions.2.transformer_blocks.1",
|
83 |
+
"mid_block.attentions.0.transformer_blocks.0",
|
84 |
+
"mid_block.attentions.0.transformer_blocks.1",
|
85 |
+
"mid_block.attentions.0.transformer_blocks.2",
|
86 |
+
"mid_block.attentions.0.transformer_blocks.3",
|
87 |
+
"mid_block.attentions.0.transformer_blocks.4",
|
88 |
+
"mid_block.attentions.0.transformer_blocks.5",
|
89 |
+
"mid_block.attentions.0.transformer_blocks.6",
|
90 |
+
"mid_block.attentions.0.transformer_blocks.7",
|
91 |
+
"mid_block.attentions.0.transformer_blocks.8",
|
92 |
+
"mid_block.attentions.0.transformer_blocks.9",
|
93 |
+
]
|
94 |
+
|
95 |
+
unet_lora_rank = 128
|
96 |
+
inference_dtype = 'fp16'
|
configs/glyph_sdxl.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#### Model Setting
|
2 |
+
pretrained_model_name_or_path = 'stabilityai/stable-diffusion-xl-base-1.0'
|
3 |
+
pretrained_vae_model_name_or_path = 'madebyollin/sdxl-vae-fp16-fix'
|
4 |
+
revision = None
|
5 |
+
|
6 |
+
byt5_max_length = 512
|
7 |
+
byt5_mapper_type = 'T5EncoderBlockByT5Mapper'
|
8 |
+
byt5_mapper_config = dict(
|
9 |
+
num_layers=4,
|
10 |
+
sdxl_channels=2048,
|
11 |
+
)
|
12 |
+
byt5_config = dict(
|
13 |
+
byt5_name='google/byt5-small',
|
14 |
+
special_token=True,
|
15 |
+
color_special_token=True,
|
16 |
+
font_special_token=True,
|
17 |
+
color_ann_path='assets/color_idx.json',
|
18 |
+
font_ann_path='assets/font_idx_512.json',
|
19 |
+
multilingual=False,
|
20 |
+
)
|
21 |
+
|
22 |
+
attn_block_to_modify = [
|
23 |
+
"down_blocks.1.attentions.0.transformer_blocks.0",
|
24 |
+
"down_blocks.1.attentions.0.transformer_blocks.1",
|
25 |
+
"down_blocks.1.attentions.1.transformer_blocks.0",
|
26 |
+
"down_blocks.1.attentions.1.transformer_blocks.1",
|
27 |
+
"down_blocks.2.attentions.0.transformer_blocks.0",
|
28 |
+
"down_blocks.2.attentions.0.transformer_blocks.1",
|
29 |
+
"down_blocks.2.attentions.0.transformer_blocks.2",
|
30 |
+
"down_blocks.2.attentions.0.transformer_blocks.3",
|
31 |
+
"down_blocks.2.attentions.0.transformer_blocks.4",
|
32 |
+
"down_blocks.2.attentions.0.transformer_blocks.5",
|
33 |
+
"down_blocks.2.attentions.0.transformer_blocks.6",
|
34 |
+
"down_blocks.2.attentions.0.transformer_blocks.7",
|
35 |
+
"down_blocks.2.attentions.0.transformer_blocks.8",
|
36 |
+
"down_blocks.2.attentions.0.transformer_blocks.9",
|
37 |
+
"down_blocks.2.attentions.1.transformer_blocks.0",
|
38 |
+
"down_blocks.2.attentions.1.transformer_blocks.1",
|
39 |
+
"down_blocks.2.attentions.1.transformer_blocks.2",
|
40 |
+
"down_blocks.2.attentions.1.transformer_blocks.3",
|
41 |
+
"down_blocks.2.attentions.1.transformer_blocks.4",
|
42 |
+
"down_blocks.2.attentions.1.transformer_blocks.5",
|
43 |
+
"down_blocks.2.attentions.1.transformer_blocks.6",
|
44 |
+
"down_blocks.2.attentions.1.transformer_blocks.7",
|
45 |
+
"down_blocks.2.attentions.1.transformer_blocks.8",
|
46 |
+
"down_blocks.2.attentions.1.transformer_blocks.9",
|
47 |
+
"up_blocks.0.attentions.0.transformer_blocks.0",
|
48 |
+
"up_blocks.0.attentions.0.transformer_blocks.1",
|
49 |
+
"up_blocks.0.attentions.0.transformer_blocks.2",
|
50 |
+
"up_blocks.0.attentions.0.transformer_blocks.3",
|
51 |
+
"up_blocks.0.attentions.0.transformer_blocks.4",
|
52 |
+
"up_blocks.0.attentions.0.transformer_blocks.5",
|
53 |
+
"up_blocks.0.attentions.0.transformer_blocks.6",
|
54 |
+
"up_blocks.0.attentions.0.transformer_blocks.7",
|
55 |
+
"up_blocks.0.attentions.0.transformer_blocks.8",
|
56 |
+
"up_blocks.0.attentions.0.transformer_blocks.9",
|
57 |
+
"up_blocks.0.attentions.1.transformer_blocks.0",
|
58 |
+
"up_blocks.0.attentions.1.transformer_blocks.1",
|
59 |
+
"up_blocks.0.attentions.1.transformer_blocks.2",
|
60 |
+
"up_blocks.0.attentions.1.transformer_blocks.3",
|
61 |
+
"up_blocks.0.attentions.1.transformer_blocks.4",
|
62 |
+
"up_blocks.0.attentions.1.transformer_blocks.5",
|
63 |
+
"up_blocks.0.attentions.1.transformer_blocks.6",
|
64 |
+
"up_blocks.0.attentions.1.transformer_blocks.7",
|
65 |
+
"up_blocks.0.attentions.1.transformer_blocks.8",
|
66 |
+
"up_blocks.0.attentions.1.transformer_blocks.9",
|
67 |
+
"up_blocks.0.attentions.2.transformer_blocks.0",
|
68 |
+
"up_blocks.0.attentions.2.transformer_blocks.1",
|
69 |
+
"up_blocks.0.attentions.2.transformer_blocks.2",
|
70 |
+
"up_blocks.0.attentions.2.transformer_blocks.3",
|
71 |
+
"up_blocks.0.attentions.2.transformer_blocks.4",
|
72 |
+
"up_blocks.0.attentions.2.transformer_blocks.5",
|
73 |
+
"up_blocks.0.attentions.2.transformer_blocks.6",
|
74 |
+
"up_blocks.0.attentions.2.transformer_blocks.7",
|
75 |
+
"up_blocks.0.attentions.2.transformer_blocks.8",
|
76 |
+
"up_blocks.0.attentions.2.transformer_blocks.9",
|
77 |
+
"up_blocks.1.attentions.0.transformer_blocks.0",
|
78 |
+
"up_blocks.1.attentions.0.transformer_blocks.1",
|
79 |
+
"up_blocks.1.attentions.1.transformer_blocks.0",
|
80 |
+
"up_blocks.1.attentions.1.transformer_blocks.1",
|
81 |
+
"up_blocks.1.attentions.2.transformer_blocks.0",
|
82 |
+
"up_blocks.1.attentions.2.transformer_blocks.1",
|
83 |
+
"mid_block.attentions.0.transformer_blocks.0",
|
84 |
+
"mid_block.attentions.0.transformer_blocks.1",
|
85 |
+
"mid_block.attentions.0.transformer_blocks.2",
|
86 |
+
"mid_block.attentions.0.transformer_blocks.3",
|
87 |
+
"mid_block.attentions.0.transformer_blocks.4",
|
88 |
+
"mid_block.attentions.0.transformer_blocks.5",
|
89 |
+
"mid_block.attentions.0.transformer_blocks.6",
|
90 |
+
"mid_block.attentions.0.transformer_blocks.7",
|
91 |
+
"mid_block.attentions.0.transformer_blocks.8",
|
92 |
+
"mid_block.attentions.0.transformer_blocks.9",
|
93 |
+
]
|
94 |
+
|
95 |
+
unet_lora_rank = 128
|
96 |
+
inference_dtype = 'fp16'
|
configs/glyph_sdxl_albedo.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#### Model Setting
|
2 |
+
pretrained_model_name_or_path = 'stablediffusionapi/albedobase-xl-20'
|
3 |
+
pretrained_vae_model_name_or_path = 'madebyollin/sdxl-vae-fp16-fix'
|
4 |
+
revision = None
|
5 |
+
|
6 |
+
byt5_max_length = 512
|
7 |
+
byt5_mapper_type = 'T5EncoderBlockByT5Mapper'
|
8 |
+
byt5_mapper_config = dict(
|
9 |
+
num_layers=4,
|
10 |
+
sdxl_channels=2048,
|
11 |
+
)
|
12 |
+
byt5_config = dict(
|
13 |
+
byt5_name='google/byt5-small',
|
14 |
+
special_token=True,
|
15 |
+
color_special_token=True,
|
16 |
+
font_special_token=True,
|
17 |
+
color_ann_path='assets/color_idx.json',
|
18 |
+
font_ann_path='assets/font_idx_512.json',
|
19 |
+
multilingual=False,
|
20 |
+
)
|
21 |
+
|
22 |
+
attn_block_to_modify = [
|
23 |
+
"down_blocks.1.attentions.0.transformer_blocks.0",
|
24 |
+
"down_blocks.1.attentions.0.transformer_blocks.1",
|
25 |
+
"down_blocks.1.attentions.1.transformer_blocks.0",
|
26 |
+
"down_blocks.1.attentions.1.transformer_blocks.1",
|
27 |
+
"down_blocks.2.attentions.0.transformer_blocks.0",
|
28 |
+
"down_blocks.2.attentions.0.transformer_blocks.1",
|
29 |
+
"down_blocks.2.attentions.0.transformer_blocks.2",
|
30 |
+
"down_blocks.2.attentions.0.transformer_blocks.3",
|
31 |
+
"down_blocks.2.attentions.0.transformer_blocks.4",
|
32 |
+
"down_blocks.2.attentions.0.transformer_blocks.5",
|
33 |
+
"down_blocks.2.attentions.0.transformer_blocks.6",
|
34 |
+
"down_blocks.2.attentions.0.transformer_blocks.7",
|
35 |
+
"down_blocks.2.attentions.0.transformer_blocks.8",
|
36 |
+
"down_blocks.2.attentions.0.transformer_blocks.9",
|
37 |
+
"down_blocks.2.attentions.1.transformer_blocks.0",
|
38 |
+
"down_blocks.2.attentions.1.transformer_blocks.1",
|
39 |
+
"down_blocks.2.attentions.1.transformer_blocks.2",
|
40 |
+
"down_blocks.2.attentions.1.transformer_blocks.3",
|
41 |
+
"down_blocks.2.attentions.1.transformer_blocks.4",
|
42 |
+
"down_blocks.2.attentions.1.transformer_blocks.5",
|
43 |
+
"down_blocks.2.attentions.1.transformer_blocks.6",
|
44 |
+
"down_blocks.2.attentions.1.transformer_blocks.7",
|
45 |
+
"down_blocks.2.attentions.1.transformer_blocks.8",
|
46 |
+
"down_blocks.2.attentions.1.transformer_blocks.9",
|
47 |
+
"up_blocks.0.attentions.0.transformer_blocks.0",
|
48 |
+
"up_blocks.0.attentions.0.transformer_blocks.1",
|
49 |
+
"up_blocks.0.attentions.0.transformer_blocks.2",
|
50 |
+
"up_blocks.0.attentions.0.transformer_blocks.3",
|
51 |
+
"up_blocks.0.attentions.0.transformer_blocks.4",
|
52 |
+
"up_blocks.0.attentions.0.transformer_blocks.5",
|
53 |
+
"up_blocks.0.attentions.0.transformer_blocks.6",
|
54 |
+
"up_blocks.0.attentions.0.transformer_blocks.7",
|
55 |
+
"up_blocks.0.attentions.0.transformer_blocks.8",
|
56 |
+
"up_blocks.0.attentions.0.transformer_blocks.9",
|
57 |
+
"up_blocks.0.attentions.1.transformer_blocks.0",
|
58 |
+
"up_blocks.0.attentions.1.transformer_blocks.1",
|
59 |
+
"up_blocks.0.attentions.1.transformer_blocks.2",
|
60 |
+
"up_blocks.0.attentions.1.transformer_blocks.3",
|
61 |
+
"up_blocks.0.attentions.1.transformer_blocks.4",
|
62 |
+
"up_blocks.0.attentions.1.transformer_blocks.5",
|
63 |
+
"up_blocks.0.attentions.1.transformer_blocks.6",
|
64 |
+
"up_blocks.0.attentions.1.transformer_blocks.7",
|
65 |
+
"up_blocks.0.attentions.1.transformer_blocks.8",
|
66 |
+
"up_blocks.0.attentions.1.transformer_blocks.9",
|
67 |
+
"up_blocks.0.attentions.2.transformer_blocks.0",
|
68 |
+
"up_blocks.0.attentions.2.transformer_blocks.1",
|
69 |
+
"up_blocks.0.attentions.2.transformer_blocks.2",
|
70 |
+
"up_blocks.0.attentions.2.transformer_blocks.3",
|
71 |
+
"up_blocks.0.attentions.2.transformer_blocks.4",
|
72 |
+
"up_blocks.0.attentions.2.transformer_blocks.5",
|
73 |
+
"up_blocks.0.attentions.2.transformer_blocks.6",
|
74 |
+
"up_blocks.0.attentions.2.transformer_blocks.7",
|
75 |
+
"up_blocks.0.attentions.2.transformer_blocks.8",
|
76 |
+
"up_blocks.0.attentions.2.transformer_blocks.9",
|
77 |
+
"up_blocks.1.attentions.0.transformer_blocks.0",
|
78 |
+
"up_blocks.1.attentions.0.transformer_blocks.1",
|
79 |
+
"up_blocks.1.attentions.1.transformer_blocks.0",
|
80 |
+
"up_blocks.1.attentions.1.transformer_blocks.1",
|
81 |
+
"up_blocks.1.attentions.2.transformer_blocks.0",
|
82 |
+
"up_blocks.1.attentions.2.transformer_blocks.1",
|
83 |
+
"mid_block.attentions.0.transformer_blocks.0",
|
84 |
+
"mid_block.attentions.0.transformer_blocks.1",
|
85 |
+
"mid_block.attentions.0.transformer_blocks.2",
|
86 |
+
"mid_block.attentions.0.transformer_blocks.3",
|
87 |
+
"mid_block.attentions.0.transformer_blocks.4",
|
88 |
+
"mid_block.attentions.0.transformer_blocks.5",
|
89 |
+
"mid_block.attentions.0.transformer_blocks.6",
|
90 |
+
"mid_block.attentions.0.transformer_blocks.7",
|
91 |
+
"mid_block.attentions.0.transformer_blocks.8",
|
92 |
+
"mid_block.attentions.0.transformer_blocks.9",
|
93 |
+
]
|
94 |
+
|
95 |
+
unet_lora_rank = 128
|
96 |
+
inference_dtype = 'fp16'
|
demo/constants.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
MAX_TEXT_BOX = 20
|
2 |
+
MAX_PROMPT_LENGTH = 512
|
examples/easter.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"texts": [
|
3 |
+
"MAY ALLYOUR PRAYERS BE ANSWERED",
|
4 |
+
"HAVE A HAPPY",
|
5 |
+
"Easter Day"
|
6 |
+
],
|
7 |
+
"styles": [
|
8 |
+
{
|
9 |
+
"color": "#5a741c",
|
10 |
+
"font-family": "Gagalin-Regular"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"color": "#5a741c",
|
14 |
+
"font-family": "Gagalin-Regular"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"color": "#5a741c",
|
18 |
+
"font-family": "Brusher-Regular"
|
19 |
+
}
|
20 |
+
],
|
21 |
+
"bbox": [
|
22 |
+
[
|
23 |
+
0.08267477203647416,
|
24 |
+
0.5355623100303951,
|
25 |
+
0.42857142857142855,
|
26 |
+
0.07477203647416414
|
27 |
+
],
|
28 |
+
[
|
29 |
+
0.08389057750759879,
|
30 |
+
0.1951367781155015,
|
31 |
+
0.38054711246200607,
|
32 |
+
0.03768996960486322
|
33 |
+
],
|
34 |
+
[
|
35 |
+
0.07537993920972644,
|
36 |
+
0.2601823708206687,
|
37 |
+
0.49544072948328266,
|
38 |
+
0.14650455927051673
|
39 |
+
]
|
40 |
+
],
|
41 |
+
"bg_prompt": "Facebook Post. The image features a small bunny rabbit sitting in a basket filled with various flowers. The basket is placed on a yellow background, creating a vibrant and cheerful scene. The flowers surrounding the rabbit come in different sizes and colors, adding to the overall visual appeal of the image. The rabbit appears to be the main focus of the scene, and its presence among the flowers creates a sense of harmony and balance. Tags: green, yellow, minimalist, easter day, happy easter day, easter, happy easter, decoration, happy, egg, spring, selebration, poster, illustration, greeting, season, design, colorful, cute, template",
|
42 |
+
"seed": 1
|
43 |
+
}
|
examples/easter.png
ADDED
examples/new_year.json
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"texts": [
|
3 |
+
"Happy New Year",
|
4 |
+
"2024",
|
5 |
+
"All THE BEST",
|
6 |
+
"A fresh start to start a change for the better."
|
7 |
+
],
|
8 |
+
"styles": [
|
9 |
+
{
|
10 |
+
"color": "#7b1f7b",
|
11 |
+
"font-family": "Caveat-Regular"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"color": "#1d1d67",
|
15 |
+
"font-family": "Gagalin-Regular"
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"color": "#060606",
|
19 |
+
"font-family": "Quicksand-Light"
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"color": "#060606",
|
23 |
+
"font-family": "Quicksand-Light"
|
24 |
+
}
|
25 |
+
],
|
26 |
+
"bbox": [
|
27 |
+
[
|
28 |
+
0.2936170212765957,
|
29 |
+
0.2887537993920973,
|
30 |
+
0.40303951367781155,
|
31 |
+
0.07173252279635259
|
32 |
+
],
|
33 |
+
[
|
34 |
+
0.24984802431610942,
|
35 |
+
0.3951367781155015,
|
36 |
+
0.46200607902735563,
|
37 |
+
0.17203647416413373
|
38 |
+
],
|
39 |
+
[
|
40 |
+
0.3951367781155015,
|
41 |
+
0.1094224924012158,
|
42 |
+
0.2109422492401216,
|
43 |
+
0.02796352583586626
|
44 |
+
],
|
45 |
+
[
|
46 |
+
0.20911854103343466,
|
47 |
+
0.6127659574468085,
|
48 |
+
0.5586626139817629,
|
49 |
+
0.08085106382978724
|
50 |
+
]
|
51 |
+
],
|
52 |
+
"bg_prompt": "Instagram Posts. The image features a white background with a variety of colorful flowers and decorations. There are several pink flowers scattered throughout the scene, with some positioned closer to the top and others near the bottom. A blue flower can also be seen in the middle of the image. The overall composition creates a visually appealing and vibrant display. Tags: grey, navy, purple, pink, teal, colorful, illustration, happy, celebration, post, party, year, new, event, celebrate, happy new year, new year, countdown, sparkle, firework",
|
53 |
+
"seed": 1
|
54 |
+
}
|
examples/new_year.png
ADDED
examples/pancake.json
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"texts": [
|
3 |
+
"Get 75% Discount for your first order",
|
4 |
+
"Order Now",
|
5 |
+
"National Pancake Day"
|
6 |
+
],
|
7 |
+
"styles": [
|
8 |
+
{
|
9 |
+
"color": "#545454",
|
10 |
+
"font-family": "MoreSugarRegular"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"color": "#ffffff",
|
14 |
+
"font-family": "Chewy-Regular"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"color": "#593535",
|
18 |
+
"font-family": "Chewy-Regular"
|
19 |
+
}
|
20 |
+
],
|
21 |
+
"bbox": [
|
22 |
+
[
|
23 |
+
0.043161094224924014,
|
24 |
+
0.5963525835866261,
|
25 |
+
0.2936170212765957,
|
26 |
+
0.08389057750759879
|
27 |
+
],
|
28 |
+
[
|
29 |
+
0.12279635258358662,
|
30 |
+
0.79209726443769,
|
31 |
+
0.26382978723404255,
|
32 |
+
0.05167173252279635
|
33 |
+
],
|
34 |
+
[
|
35 |
+
0.044984802431610946,
|
36 |
+
0.09787234042553192,
|
37 |
+
0.4413373860182371,
|
38 |
+
0.4158054711246201
|
39 |
+
]
|
40 |
+
],
|
41 |
+
"_id": "EAFKNkLcNfU-1-0-web-2-N41D3IbRZcs",
|
42 |
+
"category": "Instagram Posts",
|
43 |
+
"tags": [
|
44 |
+
"brown",
|
45 |
+
"peach",
|
46 |
+
"grey",
|
47 |
+
"modern",
|
48 |
+
"minimalist",
|
49 |
+
"simple",
|
50 |
+
"colorful",
|
51 |
+
"illustration",
|
52 |
+
"Instagram post",
|
53 |
+
"instagram",
|
54 |
+
"post",
|
55 |
+
"national pancake day",
|
56 |
+
"international pancake day",
|
57 |
+
"happy pancake day",
|
58 |
+
"pancake day",
|
59 |
+
"pancake",
|
60 |
+
"sweet",
|
61 |
+
"cake",
|
62 |
+
"discount",
|
63 |
+
"sale"
|
64 |
+
],
|
65 |
+
"bg_prompt": "Instagram Posts. The image features a stack of pancakes with syrup and strawberries on top. The pancakes are arranged in a visually appealing manner, with some pancakes placed on top of each other. The syrup is drizzled generously over the pancakes, and the strawberries are scattered around, adding a touch of color and freshness to the scene. The overall presentation of the pancakes is appetizing and inviting. Tags: brown, peach, grey, modern, minimalist, simple, colorful, illustration, Instagram post, instagram, post, national pancake day, international pancake day, happy pancake day, pancake day, pancake, sweet, cake, discount, sale",
|
66 |
+
"seed": 1
|
67 |
+
}
|
examples/pancake.png
ADDED
examples/shower.json
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"texts": [
|
3 |
+
"RSVP to +123-456-7890",
|
4 |
+
"Olivia Wilson",
|
5 |
+
"Baby Shower",
|
6 |
+
"Please Join Us For a",
|
7 |
+
"In Honoring",
|
8 |
+
"23 November, 2021 | 03:00 PM Fauget Hotels"
|
9 |
+
],
|
10 |
+
"styles": [
|
11 |
+
{
|
12 |
+
"color": "#c27b33",
|
13 |
+
"font-family": "LilitaOne"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"color": "#83940f",
|
17 |
+
"font-family": "Sensei-Medium"
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"color": "#889818",
|
21 |
+
"font-family": "Sensei-Medium"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"color": "#c27b33",
|
25 |
+
"font-family": "LilitaOne"
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"color": "#c27b33",
|
29 |
+
"font-family": "LilitaOne"
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"color": "#c27b33",
|
33 |
+
"font-family": "LilitaOne"
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"bbox": [
|
37 |
+
[
|
38 |
+
0.07112462006079028,
|
39 |
+
0.6462006079027356,
|
40 |
+
0.3373860182370821,
|
41 |
+
0.026747720364741642
|
42 |
+
],
|
43 |
+
[
|
44 |
+
0.07051671732522796,
|
45 |
+
0.38662613981762917,
|
46 |
+
0.37264437689969604,
|
47 |
+
0.059574468085106386
|
48 |
+
],
|
49 |
+
[
|
50 |
+
0.07234042553191489,
|
51 |
+
0.15623100303951368,
|
52 |
+
0.6547112462006079,
|
53 |
+
0.12401215805471125
|
54 |
+
],
|
55 |
+
[
|
56 |
+
0.0662613981762918,
|
57 |
+
0.06747720364741641,
|
58 |
+
0.3981762917933131,
|
59 |
+
0.035866261398176294
|
60 |
+
],
|
61 |
+
[
|
62 |
+
0.07051671732522796,
|
63 |
+
0.31550151975683893,
|
64 |
+
0.22006079027355624,
|
65 |
+
0.03951367781155015
|
66 |
+
],
|
67 |
+
[
|
68 |
+
0.06990881458966565,
|
69 |
+
0.48328267477203646,
|
70 |
+
0.39878419452887537,
|
71 |
+
0.1094224924012158
|
72 |
+
]
|
73 |
+
],
|
74 |
+
"bg_prompt": "Cards and invitations. The image features a large gray elephant sitting in a field of flowers, holding a smaller elephant in its arms. The scene is quite serene and picturesque, with the two elephants being the main focus of the image. The field is filled with various flowers, creating a beautiful and vibrant backdrop for the elephants. Tags: Light green, orange, Illustration, watercolor, playful, Baby shower invitation, baby boy shower invitation, baby boy, welcoming baby boy, koala baby shower invitation, baby shower invitation for baby shower, baby boy invitation, background, playful baby shower card, baby shower, card, newborn, born, Baby Shirt Baby Shower Invitation",
|
75 |
+
"seed": 0
|
76 |
+
}
|
examples/shower.png
ADDED
glyph_sdxl/custom_diffusers/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .pipelines import *
|
2 |
+
from .models import *
|
glyph_sdxl/custom_diffusers/models/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .cross_attn_insert_transformer_blocks import CrossAttnInsertBasicTransformerBlock
|
2 |
+
|
3 |
+
__all__ = ['CrossAttnInsertBasicTransformerBlock']
|
glyph_sdxl/custom_diffusers/models/cross_attn_insert_transformer_blocks.py
ADDED
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional, Dict, Any
|
2 |
+
import copy
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
|
7 |
+
from diffusers.models.attention import (
|
8 |
+
BasicTransformerBlock,
|
9 |
+
SinusoidalPositionalEmbedding,
|
10 |
+
AdaLayerNorm,
|
11 |
+
AdaLayerNormZero,
|
12 |
+
AdaLayerNormContinuous,
|
13 |
+
Attention,
|
14 |
+
FeedForward,
|
15 |
+
GatedSelfAttentionDense,
|
16 |
+
GELU,
|
17 |
+
GEGLU,
|
18 |
+
ApproximateGELU,
|
19 |
+
_chunked_feed_forward,
|
20 |
+
)
|
21 |
+
|
22 |
+
class CrossAttnInsertBasicTransformerBlock(BasicTransformerBlock):
|
23 |
+
def __init__(
|
24 |
+
self,
|
25 |
+
dim: int,
|
26 |
+
num_attention_heads: int,
|
27 |
+
attention_head_dim: int,
|
28 |
+
dropout=0.0,
|
29 |
+
cross_attention_dim: Optional[int] = None,
|
30 |
+
glyph_cross_attention_dim: Optional[int] = None,
|
31 |
+
activation_fn: str = "geglu",
|
32 |
+
num_embeds_ada_norm: Optional[int] = None,
|
33 |
+
attention_bias: bool = False,
|
34 |
+
only_cross_attention: bool = False,
|
35 |
+
double_self_attention: bool = False,
|
36 |
+
upcast_attention: bool = False,
|
37 |
+
norm_elementwise_affine: bool = True,
|
38 |
+
norm_type: str = "layer_norm", # 'layer_norm', 'ada_norm', 'ada_norm_zero', 'ada_norm_single', 'layer_norm_i2vgen'
|
39 |
+
norm_eps: float = 1e-5,
|
40 |
+
final_dropout: bool = False,
|
41 |
+
attention_type: str = "default",
|
42 |
+
positional_embeddings: Optional[str] = None,
|
43 |
+
num_positional_embeddings: Optional[int] = None,
|
44 |
+
ada_norm_continous_conditioning_embedding_dim: Optional[int] = None,
|
45 |
+
ada_norm_bias: Optional[int] = None,
|
46 |
+
ff_inner_dim: Optional[int] = None,
|
47 |
+
ff_bias: bool = True,
|
48 |
+
attention_out_bias: bool = True,
|
49 |
+
):
|
50 |
+
super(BasicTransformerBlock, self).__init__()
|
51 |
+
self.only_cross_attention = only_cross_attention
|
52 |
+
|
53 |
+
if norm_type in ("ada_norm", "ada_norm_zero") and num_embeds_ada_norm is None:
|
54 |
+
raise ValueError(
|
55 |
+
f"`norm_type` is set to {norm_type}, but `num_embeds_ada_norm` is not defined. Please make sure to"
|
56 |
+
f" define `num_embeds_ada_norm` if setting `norm_type` to {norm_type}."
|
57 |
+
)
|
58 |
+
|
59 |
+
self.norm_type = norm_type
|
60 |
+
self.num_embeds_ada_norm = num_embeds_ada_norm
|
61 |
+
|
62 |
+
if positional_embeddings and (num_positional_embeddings is None):
|
63 |
+
raise ValueError(
|
64 |
+
"If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined."
|
65 |
+
)
|
66 |
+
|
67 |
+
if positional_embeddings == "sinusoidal":
|
68 |
+
self.pos_embed = SinusoidalPositionalEmbedding(dim, max_seq_length=num_positional_embeddings)
|
69 |
+
else:
|
70 |
+
self.pos_embed = None
|
71 |
+
|
72 |
+
# Define 3 blocks. Each block has its own normalization layer.
|
73 |
+
# 1. Self-Attn
|
74 |
+
if norm_type == "ada_norm":
|
75 |
+
self.norm1 = AdaLayerNorm(dim, num_embeds_ada_norm)
|
76 |
+
elif norm_type == "ada_norm_zero":
|
77 |
+
self.norm1 = AdaLayerNormZero(dim, num_embeds_ada_norm)
|
78 |
+
elif norm_type == "ada_norm_continuous":
|
79 |
+
self.norm1 = AdaLayerNormContinuous(
|
80 |
+
dim,
|
81 |
+
ada_norm_continous_conditioning_embedding_dim,
|
82 |
+
norm_elementwise_affine,
|
83 |
+
norm_eps,
|
84 |
+
ada_norm_bias,
|
85 |
+
"rms_norm",
|
86 |
+
)
|
87 |
+
else:
|
88 |
+
self.norm1 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps)
|
89 |
+
|
90 |
+
self.attn1 = Attention(
|
91 |
+
query_dim=dim,
|
92 |
+
heads=num_attention_heads,
|
93 |
+
dim_head=attention_head_dim,
|
94 |
+
dropout=dropout,
|
95 |
+
bias=attention_bias,
|
96 |
+
cross_attention_dim=cross_attention_dim if only_cross_attention else None,
|
97 |
+
upcast_attention=upcast_attention,
|
98 |
+
out_bias=attention_out_bias,
|
99 |
+
)
|
100 |
+
|
101 |
+
# 2. Cross-Attn
|
102 |
+
if cross_attention_dim is not None or double_self_attention:
|
103 |
+
# We currently only use AdaLayerNormZero for self attention where there will only be one attention block.
|
104 |
+
# I.e. the number of returned modulation chunks from AdaLayerZero would not make sense if returned during
|
105 |
+
# the second cross attention block.
|
106 |
+
if norm_type == "ada_norm":
|
107 |
+
self.norm2 = AdaLayerNorm(dim, num_embeds_ada_norm)
|
108 |
+
elif norm_type == "ada_norm_continuous":
|
109 |
+
self.norm2 = AdaLayerNormContinuous(
|
110 |
+
dim,
|
111 |
+
ada_norm_continous_conditioning_embedding_dim,
|
112 |
+
norm_elementwise_affine,
|
113 |
+
norm_eps,
|
114 |
+
ada_norm_bias,
|
115 |
+
"rms_norm",
|
116 |
+
)
|
117 |
+
else:
|
118 |
+
self.norm2 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine)
|
119 |
+
|
120 |
+
self.attn2 = Attention(
|
121 |
+
query_dim=dim,
|
122 |
+
cross_attention_dim=cross_attention_dim if not double_self_attention else None,
|
123 |
+
heads=num_attention_heads,
|
124 |
+
dim_head=attention_head_dim,
|
125 |
+
dropout=dropout,
|
126 |
+
bias=attention_bias,
|
127 |
+
upcast_attention=upcast_attention,
|
128 |
+
out_bias=attention_out_bias,
|
129 |
+
) # is self-attn if encoder_hidden_states is none
|
130 |
+
else:
|
131 |
+
self.norm2 = None
|
132 |
+
self.attn2 = None
|
133 |
+
|
134 |
+
# 3. Feed-forward
|
135 |
+
if norm_type == "ada_norm_continuous":
|
136 |
+
self.norm3 = AdaLayerNormContinuous(
|
137 |
+
dim,
|
138 |
+
ada_norm_continous_conditioning_embedding_dim,
|
139 |
+
norm_elementwise_affine,
|
140 |
+
norm_eps,
|
141 |
+
ada_norm_bias,
|
142 |
+
"layer_norm",
|
143 |
+
)
|
144 |
+
|
145 |
+
elif norm_type in ["ada_norm_zero", "ada_norm", "layer_norm", "ada_norm_continuous"]:
|
146 |
+
self.norm3 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine)
|
147 |
+
elif norm_type == "layer_norm_i2vgen":
|
148 |
+
self.norm3 = None
|
149 |
+
|
150 |
+
self.ff = FeedForward(
|
151 |
+
dim,
|
152 |
+
dropout=dropout,
|
153 |
+
activation_fn=activation_fn,
|
154 |
+
final_dropout=final_dropout,
|
155 |
+
inner_dim=ff_inner_dim,
|
156 |
+
bias=ff_bias,
|
157 |
+
)
|
158 |
+
|
159 |
+
# 4. Fuser
|
160 |
+
if attention_type == "gated" or attention_type == "gated-text-image":
|
161 |
+
self.fuser = GatedSelfAttentionDense(dim, cross_attention_dim, num_attention_heads, attention_head_dim)
|
162 |
+
|
163 |
+
# 5. Scale-shift for PixArt-Alpha.
|
164 |
+
if norm_type == "ada_norm_single":
|
165 |
+
self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5)
|
166 |
+
|
167 |
+
# let chunk size default to None
|
168 |
+
self._chunk_size = None
|
169 |
+
self._chunk_dim = 0
|
170 |
+
|
171 |
+
def get_inserted_modules(self):
|
172 |
+
return ()
|
173 |
+
|
174 |
+
def get_inserted_modules_names(self):
|
175 |
+
return ()
|
176 |
+
|
177 |
+
def get_origin_modules(self):
|
178 |
+
inserted_modules = self.get_inserted_modules()
|
179 |
+
origin_modules = []
|
180 |
+
for module in self.children():
|
181 |
+
if module not in inserted_modules:
|
182 |
+
origin_modules.append(module)
|
183 |
+
return tuple(origin_modules)
|
184 |
+
|
185 |
+
|
186 |
+
@classmethod
|
187 |
+
def from_transformer_block(
|
188 |
+
cls,
|
189 |
+
transformer_block,
|
190 |
+
glyph_cross_attention_dim,
|
191 |
+
):
|
192 |
+
inner_dim = transformer_block.attn1.query_dim
|
193 |
+
num_attention_heads = transformer_block.attn1.heads
|
194 |
+
attention_head_dim = transformer_block.attn1.inner_dim // num_attention_heads
|
195 |
+
dropout = transformer_block.attn1.dropout
|
196 |
+
cross_attention_dim = transformer_block.attn2.cross_attention_dim
|
197 |
+
if isinstance(transformer_block.ff.net[0], GELU):
|
198 |
+
if transformer_block.ff.net[0].approximate == "tanh":
|
199 |
+
activation_fn = "gelu-approximate"
|
200 |
+
else:
|
201 |
+
activation_fn = "gelu"
|
202 |
+
elif isinstance(transformer_block.ff.net[0], GEGLU):
|
203 |
+
activation_fn = "geglu"
|
204 |
+
elif isinstance(transformer_block.ff.net[0], ApproximateGELU):
|
205 |
+
activation_fn = "geglu-approximate"
|
206 |
+
num_embeds_ada_norm = transformer_block.num_embeds_ada_norm
|
207 |
+
attention_bias = transformer_block.attn1.to_q.bias is not None
|
208 |
+
only_cross_attention = transformer_block.only_cross_attention
|
209 |
+
double_self_attention = transformer_block.attn2.cross_attention_dim is None
|
210 |
+
upcast_attention = transformer_block.attn1.upcast_attention
|
211 |
+
norm_type = transformer_block.norm_type
|
212 |
+
assert isinstance(transformer_block.norm1, nn.LayerNorm)
|
213 |
+
norm_elementwise_affine = transformer_block.norm1.elementwise_affine
|
214 |
+
norm_eps = transformer_block.norm1.eps
|
215 |
+
assert getattr(transformer_block, 'fuser', None) is None
|
216 |
+
attention_type = "default"
|
217 |
+
model = cls(
|
218 |
+
inner_dim,
|
219 |
+
num_attention_heads,
|
220 |
+
attention_head_dim,
|
221 |
+
dropout=dropout,
|
222 |
+
cross_attention_dim=cross_attention_dim,
|
223 |
+
glyph_cross_attention_dim=glyph_cross_attention_dim,
|
224 |
+
activation_fn=activation_fn,
|
225 |
+
num_embeds_ada_norm=num_embeds_ada_norm,
|
226 |
+
attention_bias=attention_bias,
|
227 |
+
only_cross_attention=only_cross_attention,
|
228 |
+
double_self_attention=double_self_attention,
|
229 |
+
upcast_attention=upcast_attention,
|
230 |
+
norm_type=norm_type,
|
231 |
+
norm_elementwise_affine=norm_elementwise_affine,
|
232 |
+
norm_eps=norm_eps,
|
233 |
+
attention_type=attention_type,
|
234 |
+
)
|
235 |
+
missing_keys, unexpected_keys = model.load_state_dict(
|
236 |
+
transformer_block.state_dict(),
|
237 |
+
strict=False,
|
238 |
+
)
|
239 |
+
assert len(unexpected_keys) == 0
|
240 |
+
assert all(i.startswith('glyph') for i in missing_keys)
|
241 |
+
|
242 |
+
return model
|
243 |
+
|
244 |
+
def forward(
|
245 |
+
self,
|
246 |
+
hidden_states: torch.FloatTensor,
|
247 |
+
attention_mask: Optional[torch.FloatTensor] = None,
|
248 |
+
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
249 |
+
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
250 |
+
timestep: Optional[torch.LongTensor] = None,
|
251 |
+
cross_attention_kwargs: Dict[str, Any] = None,
|
252 |
+
class_labels: Optional[torch.LongTensor] = None,
|
253 |
+
added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
|
254 |
+
) -> torch.FloatTensor:
|
255 |
+
# Notice that normalization is always applied before the real computation in the following blocks.
|
256 |
+
# 0. Self-Attention
|
257 |
+
batch_size = hidden_states.shape[0]
|
258 |
+
|
259 |
+
if self.norm_type == "ada_norm":
|
260 |
+
norm_hidden_states = self.norm1(hidden_states, timestep)
|
261 |
+
elif self.norm_type == "ada_norm_zero":
|
262 |
+
norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
|
263 |
+
hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
|
264 |
+
)
|
265 |
+
elif self.norm_type in ["layer_norm", "layer_norm_i2vgen"]:
|
266 |
+
norm_hidden_states = self.norm1(hidden_states)
|
267 |
+
elif self.norm_type == "ada_norm_continuous":
|
268 |
+
norm_hidden_states = self.norm1(hidden_states, added_cond_kwargs["pooled_text_emb"])
|
269 |
+
elif self.norm_type == "ada_norm_single":
|
270 |
+
shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (
|
271 |
+
self.scale_shift_table[None] + timestep.reshape(batch_size, 6, -1)
|
272 |
+
).chunk(6, dim=1)
|
273 |
+
norm_hidden_states = self.norm1(hidden_states)
|
274 |
+
norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa
|
275 |
+
norm_hidden_states = norm_hidden_states.squeeze(1)
|
276 |
+
else:
|
277 |
+
raise ValueError("Incorrect norm used")
|
278 |
+
|
279 |
+
if self.pos_embed is not None:
|
280 |
+
norm_hidden_states = self.pos_embed(norm_hidden_states)
|
281 |
+
|
282 |
+
# 1. Retrieve lora scale.
|
283 |
+
lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
|
284 |
+
|
285 |
+
# 2. Prepare GLIGEN inputs
|
286 |
+
cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
|
287 |
+
gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
|
288 |
+
|
289 |
+
glyph_encoder_hidden_states = cross_attention_kwargs.pop("glyph_encoder_hidden_states", None)
|
290 |
+
# a dict. visual_feat_len: tensor(b, visual_feat_len,text—_feat_len)
|
291 |
+
glyph_attn_mask = cross_attention_kwargs.pop("glyph_attn_masks_dict", None)
|
292 |
+
bg_attn_mask = cross_attention_kwargs.pop("bg_attn_masks_dict", None)
|
293 |
+
if glyph_attn_mask is not None:
|
294 |
+
glyph_attn_mask = glyph_attn_mask[hidden_states.shape[1]]
|
295 |
+
if bg_attn_mask is not None:
|
296 |
+
bg_attn_mask = bg_attn_mask[hidden_states.shape[1]]
|
297 |
+
assert encoder_attention_mask is None, "encoder_attention_mask is not supported in this block."
|
298 |
+
|
299 |
+
attn_output = self.attn1(
|
300 |
+
norm_hidden_states,
|
301 |
+
encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
|
302 |
+
attention_mask=attention_mask,
|
303 |
+
**cross_attention_kwargs,
|
304 |
+
)
|
305 |
+
if self.norm_type == "ada_norm_zero":
|
306 |
+
attn_output = gate_msa.unsqueeze(1) * attn_output
|
307 |
+
elif self.norm_type == "ada_norm_single":
|
308 |
+
attn_output = gate_msa * attn_output
|
309 |
+
|
310 |
+
hidden_states = attn_output + hidden_states
|
311 |
+
if hidden_states.ndim == 4:
|
312 |
+
hidden_states = hidden_states.squeeze(1)
|
313 |
+
|
314 |
+
# 2.5 GLIGEN Control
|
315 |
+
if gligen_kwargs is not None:
|
316 |
+
hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
|
317 |
+
|
318 |
+
# 3. Cross-Attention
|
319 |
+
if self.attn2 is not None:
|
320 |
+
if self.norm_type == "ada_norm":
|
321 |
+
norm_hidden_states = self.norm2(hidden_states, timestep)
|
322 |
+
elif self.norm_type in ["ada_norm_zero", "layer_norm", "layer_norm_i2vgen"]:
|
323 |
+
norm_hidden_states = self.norm2(hidden_states)
|
324 |
+
elif self.norm_type == "ada_norm_single":
|
325 |
+
# For PixArt norm2 isn't applied here:
|
326 |
+
# https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L70C1-L76C103
|
327 |
+
norm_hidden_states = hidden_states
|
328 |
+
elif self.norm_type == "ada_norm_continuous":
|
329 |
+
norm_hidden_states = self.norm2(hidden_states, added_cond_kwargs["pooled_text_emb"])
|
330 |
+
else:
|
331 |
+
raise ValueError("Incorrect norm")
|
332 |
+
|
333 |
+
if self.pos_embed is not None and self.norm_type != "ada_norm_single":
|
334 |
+
norm_hidden_states = self.pos_embed(norm_hidden_states)
|
335 |
+
|
336 |
+
attn_output = self.attn2(
|
337 |
+
norm_hidden_states,
|
338 |
+
encoder_hidden_states=torch.cat([encoder_hidden_states, glyph_encoder_hidden_states], dim=1),
|
339 |
+
attention_mask=torch.cat([bg_attn_mask, glyph_attn_mask], dim=-1),
|
340 |
+
**cross_attention_kwargs,
|
341 |
+
)
|
342 |
+
|
343 |
+
hidden_states = attn_output + hidden_states
|
344 |
+
|
345 |
+
# 4. Feed-forward
|
346 |
+
# i2vgen doesn't have this norm 🤷♂️
|
347 |
+
if self.norm_type == "ada_norm_continuous":
|
348 |
+
norm_hidden_states = self.norm3(hidden_states, added_cond_kwargs["pooled_text_emb"])
|
349 |
+
elif not self.norm_type == "ada_norm_single":
|
350 |
+
norm_hidden_states = self.norm3(hidden_states)
|
351 |
+
|
352 |
+
if self.norm_type == "ada_norm_zero":
|
353 |
+
norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
|
354 |
+
|
355 |
+
if self.norm_type == "ada_norm_single":
|
356 |
+
norm_hidden_states = self.norm2(hidden_states)
|
357 |
+
norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp
|
358 |
+
|
359 |
+
if self._chunk_size is not None:
|
360 |
+
# "feed_forward_chunk_size" can be used to save memory
|
361 |
+
ff_output = _chunked_feed_forward(
|
362 |
+
self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size, lora_scale=lora_scale
|
363 |
+
)
|
364 |
+
else:
|
365 |
+
ff_output = self.ff(norm_hidden_states, scale=lora_scale)
|
366 |
+
|
367 |
+
if self.norm_type == "ada_norm_zero":
|
368 |
+
ff_output = gate_mlp.unsqueeze(1) * ff_output
|
369 |
+
elif self.norm_type == "ada_norm_single":
|
370 |
+
ff_output = gate_mlp * ff_output
|
371 |
+
|
372 |
+
hidden_states = ff_output + hidden_states
|
373 |
+
if hidden_states.ndim == 4:
|
374 |
+
hidden_states = hidden_states.squeeze(1)
|
375 |
+
|
376 |
+
return hidden_states
|
377 |
+
|
glyph_sdxl/custom_diffusers/pipelines/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .pipeline_stable_diffusion_glyph_xl import StableDiffusionGlyphXLPipeline
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
'StableDiffusionGlyphXLPipeline',
|
5 |
+
]
|
glyph_sdxl/custom_diffusers/pipelines/pipeline_stable_diffusion_glyph_xl.py
ADDED
@@ -0,0 +1,922 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from typing import Optional, List, Union, Dict, Tuple, Callable, Any
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from transformers import T5EncoderModel, T5Tokenizer
|
6 |
+
import torch.nn.functional as F
|
7 |
+
|
8 |
+
from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
|
9 |
+
StableDiffusionXLPipeline,
|
10 |
+
AutoencoderKL,
|
11 |
+
CLIPTextModel,
|
12 |
+
CLIPTextModelWithProjection,
|
13 |
+
CLIPTokenizer,
|
14 |
+
UNet2DConditionModel,
|
15 |
+
KarrasDiffusionSchedulers,
|
16 |
+
CLIPVisionModelWithProjection,
|
17 |
+
CLIPImageProcessor,
|
18 |
+
VaeImageProcessor,
|
19 |
+
is_invisible_watermark_available,
|
20 |
+
StableDiffusionXLLoraLoaderMixin,
|
21 |
+
PipelineImageInput,
|
22 |
+
adjust_lora_scale_text_encoder,
|
23 |
+
scale_lora_layers,
|
24 |
+
unscale_lora_layers,
|
25 |
+
USE_PEFT_BACKEND,
|
26 |
+
StableDiffusionXLPipelineOutput,
|
27 |
+
ImageProjection,
|
28 |
+
logging,
|
29 |
+
rescale_noise_cfg,
|
30 |
+
retrieve_timesteps,
|
31 |
+
deprecate,
|
32 |
+
)
|
33 |
+
import numpy as np
|
34 |
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
35 |
+
|
36 |
+
from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
37 |
+
|
38 |
+
class StableDiffusionGlyphXLPipeline(StableDiffusionXLPipeline):
|
39 |
+
model_cpu_offload_seq = "text_encoder->text_encoder_2->byt5_text_encoder->image_encoder->unet->byt5_mapper->vae"
|
40 |
+
_optional_components = [
|
41 |
+
"tokenizer",
|
42 |
+
"tokenizer_2",
|
43 |
+
"byt5_tokenizer",
|
44 |
+
"text_encoder",
|
45 |
+
"text_encoder_2",
|
46 |
+
"byt5_text_encoder",
|
47 |
+
"byt5_mapper",
|
48 |
+
"image_encoder",
|
49 |
+
"feature_extractor",
|
50 |
+
]
|
51 |
+
_callback_tensor_inputs = [
|
52 |
+
"latents",
|
53 |
+
"prompt_embeds",
|
54 |
+
"negative_prompt_embeds",
|
55 |
+
"add_text_embeds",
|
56 |
+
"add_time_ids",
|
57 |
+
"negative_pooled_prompt_embeds",
|
58 |
+
"negative_add_time_ids",
|
59 |
+
]
|
60 |
+
def __init__(
|
61 |
+
self,
|
62 |
+
vae: AutoencoderKL,
|
63 |
+
text_encoder: CLIPTextModel,
|
64 |
+
text_encoder_2: CLIPTextModelWithProjection,
|
65 |
+
byt5_text_encoder: T5EncoderModel,
|
66 |
+
tokenizer: CLIPTokenizer,
|
67 |
+
tokenizer_2: CLIPTokenizer,
|
68 |
+
byt5_tokenizer: T5Tokenizer,
|
69 |
+
byt5_mapper,
|
70 |
+
unet: UNet2DConditionModel,
|
71 |
+
scheduler: KarrasDiffusionSchedulers,
|
72 |
+
byt5_max_length: int = 512,
|
73 |
+
image_encoder: CLIPVisionModelWithProjection = None,
|
74 |
+
feature_extractor: CLIPImageProcessor = None,
|
75 |
+
force_zeros_for_empty_prompt: bool = True,
|
76 |
+
add_watermarker: Optional[bool] = None,
|
77 |
+
):
|
78 |
+
super(StableDiffusionXLPipeline, self).__init__()
|
79 |
+
|
80 |
+
self.register_modules(
|
81 |
+
vae=vae,
|
82 |
+
text_encoder=text_encoder,
|
83 |
+
text_encoder_2=text_encoder_2,
|
84 |
+
byt5_text_encoder=byt5_text_encoder,
|
85 |
+
tokenizer=tokenizer,
|
86 |
+
tokenizer_2=tokenizer_2,
|
87 |
+
byt5_tokenizer=byt5_tokenizer,
|
88 |
+
byt5_mapper=byt5_mapper,
|
89 |
+
unet=unet,
|
90 |
+
scheduler=scheduler,
|
91 |
+
image_encoder=image_encoder,
|
92 |
+
feature_extractor=feature_extractor,
|
93 |
+
)
|
94 |
+
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
95 |
+
self.register_to_config(byt5_max_length=byt5_max_length)
|
96 |
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
97 |
+
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
98 |
+
self.byt5_max_length = byt5_max_length
|
99 |
+
|
100 |
+
self.default_sample_size = self.unet.config.sample_size
|
101 |
+
|
102 |
+
add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
|
103 |
+
|
104 |
+
if add_watermarker:
|
105 |
+
self.watermark = StableDiffusionXLWatermarker()
|
106 |
+
else:
|
107 |
+
self.watermark = None
|
108 |
+
|
109 |
+
def encode_prompt(
|
110 |
+
self,
|
111 |
+
prompt: str,
|
112 |
+
prompt_2: Optional[str] = None,
|
113 |
+
text_prompt = None,
|
114 |
+
device: Optional[torch.device] = None,
|
115 |
+
num_images_per_prompt: int = 1,
|
116 |
+
do_classifier_free_guidance: bool = True,
|
117 |
+
negative_prompt: Optional[str] = None,
|
118 |
+
negative_prompt_2: Optional[str] = None,
|
119 |
+
prompt_embeds: Optional[torch.FloatTensor] = None,
|
120 |
+
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
121 |
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
122 |
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
123 |
+
lora_scale: Optional[float] = None,
|
124 |
+
clip_skip: Optional[int] = None,
|
125 |
+
text_attn_mask: Optional[torch.LongTensor] = None,
|
126 |
+
byt5_prompt_embeds: Optional[torch.FloatTensor] = None,
|
127 |
+
):
|
128 |
+
r"""
|
129 |
+
Encodes the prompt into text encoder hidden states.
|
130 |
+
|
131 |
+
Args:
|
132 |
+
prompt (`str` or `List[str]`, *optional*):
|
133 |
+
prompt to be encoded
|
134 |
+
prompt_2 (`str` or `List[str]`, *optional*):
|
135 |
+
The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
|
136 |
+
used in both text-encoders
|
137 |
+
device: (`torch.device`):
|
138 |
+
torch device
|
139 |
+
num_images_per_prompt (`int`):
|
140 |
+
number of images that should be generated per prompt
|
141 |
+
do_classifier_free_guidance (`bool`):
|
142 |
+
whether to use classifier free guidance or not
|
143 |
+
negative_prompt (`str` or `List[str]`, *optional*):
|
144 |
+
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
145 |
+
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
146 |
+
less than `1`).
|
147 |
+
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
148 |
+
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
149 |
+
`text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
|
150 |
+
prompt_embeds (`torch.FloatTensor`, *optional*):
|
151 |
+
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
152 |
+
provided, text embeddings will be generated from `prompt` input argument.
|
153 |
+
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
|
154 |
+
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
155 |
+
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
156 |
+
argument.
|
157 |
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
158 |
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
159 |
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
160 |
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
161 |
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
162 |
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
163 |
+
input argument.
|
164 |
+
lora_scale (`float`, *optional*):
|
165 |
+
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
166 |
+
clip_skip (`int`, *optional*):
|
167 |
+
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
168 |
+
the output of the pre-final layer will be used for computing the prompt embeddings.
|
169 |
+
"""
|
170 |
+
device = device or self._execution_device
|
171 |
+
|
172 |
+
# set lora scale so that monkey patched LoRA
|
173 |
+
# function of text encoder can correctly access it
|
174 |
+
if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
|
175 |
+
self._lora_scale = lora_scale
|
176 |
+
|
177 |
+
# dynamically adjust the LoRA scale
|
178 |
+
if self.text_encoder is not None:
|
179 |
+
if not USE_PEFT_BACKEND:
|
180 |
+
adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
|
181 |
+
else:
|
182 |
+
scale_lora_layers(self.text_encoder, lora_scale)
|
183 |
+
|
184 |
+
if self.text_encoder_2 is not None:
|
185 |
+
if not USE_PEFT_BACKEND:
|
186 |
+
adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
|
187 |
+
else:
|
188 |
+
scale_lora_layers(self.text_encoder_2, lora_scale)
|
189 |
+
|
190 |
+
prompt = [prompt] if isinstance(prompt, str) else prompt
|
191 |
+
|
192 |
+
if prompt is not None:
|
193 |
+
batch_size = len(prompt)
|
194 |
+
else:
|
195 |
+
batch_size = prompt_embeds.shape[0]
|
196 |
+
|
197 |
+
# Define tokenizers and text encoders
|
198 |
+
tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
|
199 |
+
text_encoders = (
|
200 |
+
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
|
201 |
+
)
|
202 |
+
|
203 |
+
if prompt_embeds is None:
|
204 |
+
assert len(prompt) == 1
|
205 |
+
prompt_2 = prompt_2 or prompt
|
206 |
+
prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
|
207 |
+
|
208 |
+
text_prompt = [text_prompt] if isinstance(text_prompt, str) else text_prompt
|
209 |
+
|
210 |
+
# textual inversion: procecss multi-vector tokens if necessary
|
211 |
+
prompt_embeds_list = []
|
212 |
+
prompts = [prompt, prompt_2]
|
213 |
+
text_input_id_batchs = []
|
214 |
+
for prompt, tokenizer in zip(prompts, tokenizers):
|
215 |
+
pad_token = tokenizer.pad_token_id
|
216 |
+
total_tokens = tokenizer(prompt, truncation=False)['input_ids'][0]
|
217 |
+
bos = total_tokens[0]
|
218 |
+
eos = total_tokens[-1]
|
219 |
+
total_tokens = total_tokens[1:-1]
|
220 |
+
new_total_tokens = []
|
221 |
+
empty_flag = True
|
222 |
+
while len(total_tokens) >= 75:
|
223 |
+
head_75_tokens = [total_tokens.pop(0) for _ in range(75)]
|
224 |
+
temp_77_token_ids = [bos] + head_75_tokens + [eos]
|
225 |
+
new_total_tokens.append(temp_77_token_ids)
|
226 |
+
empty_flag = False
|
227 |
+
if len(total_tokens) > 0 or empty_flag:
|
228 |
+
padding_len = 75 - len(total_tokens)
|
229 |
+
temp_77_token_ids = [bos] + total_tokens + [eos] + [pad_token] * padding_len
|
230 |
+
new_total_tokens.append(temp_77_token_ids)
|
231 |
+
# 1,segment_len, 77
|
232 |
+
new_total_tokens = torch.tensor(new_total_tokens, dtype=torch.long).unsqueeze(0)
|
233 |
+
text_input_id_batchs.append(new_total_tokens)
|
234 |
+
if text_input_id_batchs[0].shape[1] > text_input_id_batchs[1].shape[1]:
|
235 |
+
tokenizer = tokenizers[1]
|
236 |
+
pad_token = tokenizer.pad_token_id
|
237 |
+
bos = tokenizer.bos_token_id
|
238 |
+
eos = tokenizer.eos_token_id
|
239 |
+
padding_len = text_input_id_batchs[0].shape[1] - text_input_id_batchs[1].shape[1]
|
240 |
+
# padding_len, 77
|
241 |
+
padding_part = torch.tensor([[bos] + [eos] + [pad_token] * 75 for _ in range(padding_len)])
|
242 |
+
# 1, padding_len, 77
|
243 |
+
padding_part = padding_part.unsqueeze(0)
|
244 |
+
text_input_id_batchs[1] = torch.cat((text_input_id_batchs[1],padding_part), dim=1)
|
245 |
+
elif text_input_id_batchs[0].shape[1] < text_input_id_batchs[1].shape[1]:
|
246 |
+
tokenizer = tokenizers[0]
|
247 |
+
pad_token = tokenizer.pad_token_id
|
248 |
+
bos = tokenizer.bos_token_id
|
249 |
+
eos = tokenizer.eos_token_id
|
250 |
+
padding_len = text_input_id_batchs[1].shape[1] - text_input_id_batchs[0].shape[1]
|
251 |
+
# padding_len, 77
|
252 |
+
padding_part = torch.tensor([[bos] + [eos] + [pad_token] * 75 for _ in range(padding_len)])
|
253 |
+
# 1, padding_len, 77
|
254 |
+
padding_part = padding_part.unsqueeze(0)
|
255 |
+
text_input_id_batchs[0] = torch.cat((text_input_id_batchs[0],padding_part), dim=1)
|
256 |
+
|
257 |
+
embeddings = []
|
258 |
+
for segment_idx in range(text_input_id_batchs[0].shape[1]):
|
259 |
+
prompt_embeds_list = []
|
260 |
+
for i, text_encoder in enumerate(text_encoders):
|
261 |
+
# 1, segment_len, sequence_len
|
262 |
+
text_input_ids = text_input_id_batchs[i].to(text_encoder.device)
|
263 |
+
# 1, sequence_len, dim
|
264 |
+
prompt_embeds = text_encoder(
|
265 |
+
text_input_ids[:, segment_idx],
|
266 |
+
output_hidden_states=True,
|
267 |
+
)
|
268 |
+
|
269 |
+
# We are only ALWAYS interested in the pooled output of the final text encoder
|
270 |
+
temp_pooled_prompt_embeds = prompt_embeds[0]
|
271 |
+
if clip_skip is None:
|
272 |
+
prompt_embeds = prompt_embeds.hidden_states[-2]
|
273 |
+
else:
|
274 |
+
prompt_embeds = prompt_embeds.hidden_states[-(clip_skip + 2)]
|
275 |
+
bs_embed, seq_len, _ = prompt_embeds.shape
|
276 |
+
prompt_embeds = prompt_embeds.view(bs_embed, seq_len, -1)
|
277 |
+
prompt_embeds_list.append(prompt_embeds)
|
278 |
+
# b, sequence_len, dim
|
279 |
+
prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
|
280 |
+
embeddings.append(prompt_embeds)
|
281 |
+
if segment_idx == 0:
|
282 |
+
# use the first segment's pooled prompt embeddings as
|
283 |
+
# the pooled prompt embeddings
|
284 |
+
# b, dim->b, dim
|
285 |
+
pooled_prompt_embeds = temp_pooled_prompt_embeds.view(bs_embed, -1)
|
286 |
+
# b, segment_len * sequence_len, dim
|
287 |
+
prompt_embeds = torch.cat(embeddings, dim=1)
|
288 |
+
|
289 |
+
if byt5_prompt_embeds is None:
|
290 |
+
byt5_text_inputs = self.byt5_tokenizer(
|
291 |
+
text_prompt,
|
292 |
+
padding="max_length",
|
293 |
+
max_length=self.byt5_max_length,
|
294 |
+
truncation=True,
|
295 |
+
add_special_tokens=True,
|
296 |
+
return_tensors="pt",
|
297 |
+
)
|
298 |
+
byt5_text_input_ids = byt5_text_inputs.input_ids
|
299 |
+
byt5_attention_mask = byt5_text_inputs.attention_mask.to(self.byt5_text_encoder.device) if text_attn_mask is None else text_attn_mask.to(self.byt5_text_encoder.device, dtype=byt5_text_inputs.attention_mask.dtype)
|
300 |
+
with torch.cuda.amp.autocast(enabled=False):
|
301 |
+
byt5_prompt_embeds = self.byt5_text_encoder(
|
302 |
+
byt5_text_input_ids.to(self.byt5_text_encoder.device),
|
303 |
+
attention_mask=byt5_attention_mask.float(),
|
304 |
+
)
|
305 |
+
byt5_prompt_embeds = byt5_prompt_embeds[0]
|
306 |
+
byt5_prompt_embeds = self.byt5_mapper(byt5_prompt_embeds, byt5_attention_mask)
|
307 |
+
|
308 |
+
# get unconditional embeddings for classifier free guidance
|
309 |
+
zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
|
310 |
+
if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
|
311 |
+
negative_prompt_embeds = torch.zeros_like(prompt_embeds)
|
312 |
+
negative_byt5_prompt_embeds = torch.zeros_like(byt5_prompt_embeds)
|
313 |
+
negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
|
314 |
+
elif do_classifier_free_guidance and negative_prompt_embeds is None:
|
315 |
+
raise NotImplementedError
|
316 |
+
|
317 |
+
if self.text_encoder_2 is not None:
|
318 |
+
prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
|
319 |
+
else:
|
320 |
+
prompt_embeds = prompt_embeds.to(dtype=self.unet.dtype, device=device)
|
321 |
+
|
322 |
+
bs_embed, seq_len, _ = prompt_embeds.shape
|
323 |
+
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
324 |
+
prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
325 |
+
prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
|
326 |
+
|
327 |
+
if do_classifier_free_guidance:
|
328 |
+
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
329 |
+
seq_len = negative_prompt_embeds.shape[1]
|
330 |
+
byt5_seq_len = negative_byt5_prompt_embeds.shape[1]
|
331 |
+
|
332 |
+
if self.text_encoder_2 is not None:
|
333 |
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
|
334 |
+
else:
|
335 |
+
negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.unet.dtype, device=device)
|
336 |
+
negative_byt5_prompt_embeds = negative_byt5_prompt_embeds.to(dtype=self.byt5_text_encoder.dtype, device=device)
|
337 |
+
|
338 |
+
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
339 |
+
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
340 |
+
negative_byt5_prompt_embeds = negative_byt5_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
341 |
+
negative_byt5_prompt_embeds = negative_byt5_prompt_embeds.view(batch_size * num_images_per_prompt, byt5_seq_len, -1)
|
342 |
+
|
343 |
+
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
344 |
+
bs_embed * num_images_per_prompt, -1
|
345 |
+
)
|
346 |
+
if do_classifier_free_guidance:
|
347 |
+
negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
|
348 |
+
bs_embed * num_images_per_prompt, -1
|
349 |
+
)
|
350 |
+
|
351 |
+
if self.text_encoder is not None:
|
352 |
+
if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
|
353 |
+
# Retrieve the original scale by scaling back the LoRA layers
|
354 |
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
355 |
+
|
356 |
+
if self.text_encoder_2 is not None:
|
357 |
+
if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
|
358 |
+
# Retrieve the original scale by scaling back the LoRA layers
|
359 |
+
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
360 |
+
|
361 |
+
return (
|
362 |
+
prompt_embeds,
|
363 |
+
negative_prompt_embeds,
|
364 |
+
pooled_prompt_embeds,
|
365 |
+
negative_pooled_prompt_embeds,
|
366 |
+
byt5_prompt_embeds,
|
367 |
+
negative_byt5_prompt_embeds,
|
368 |
+
)
|
369 |
+
|
370 |
+
@torch.no_grad()
|
371 |
+
def __call__(
|
372 |
+
self,
|
373 |
+
prompt: Union[str, List[str]] = None,
|
374 |
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
375 |
+
text_prompt = None,
|
376 |
+
texts = None,
|
377 |
+
bboxes = None,
|
378 |
+
height: Optional[int] = None,
|
379 |
+
width: Optional[int] = None,
|
380 |
+
num_inference_steps: int = 50,
|
381 |
+
timesteps: List[int] = None,
|
382 |
+
denoising_end: Optional[float] = None,
|
383 |
+
guidance_scale: float = 5.0,
|
384 |
+
negative_prompt: Optional[Union[str, List[str]]] = None,
|
385 |
+
negative_prompt_2: Optional[Union[str, List[str]]] = None,
|
386 |
+
num_images_per_prompt: Optional[int] = 1,
|
387 |
+
eta: float = 0.0,
|
388 |
+
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
|
389 |
+
latents: Optional[torch.FloatTensor] = None,
|
390 |
+
prompt_embeds: Optional[torch.FloatTensor] = None,
|
391 |
+
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
392 |
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
393 |
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
394 |
+
ip_adapter_image: Optional[PipelineImageInput] = None,
|
395 |
+
output_type: Optional[str] = "pil",
|
396 |
+
return_dict: bool = True,
|
397 |
+
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
398 |
+
guidance_rescale: float = 0.0,
|
399 |
+
original_size: Optional[Tuple[int, int]] = None,
|
400 |
+
crops_coords_top_left: Tuple[int, int] = (0, 0),
|
401 |
+
target_size: Optional[Tuple[int, int]] = None,
|
402 |
+
negative_original_size: Optional[Tuple[int, int]] = None,
|
403 |
+
negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
|
404 |
+
negative_target_size: Optional[Tuple[int, int]] = None,
|
405 |
+
clip_skip: Optional[int] = None,
|
406 |
+
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
407 |
+
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
408 |
+
text_attn_mask: torch.LongTensor = None,
|
409 |
+
denoising_start: Optional[float] = None,
|
410 |
+
byt5_prompt_embeds: Optional[torch.FloatTensor] = None,
|
411 |
+
**kwargs,
|
412 |
+
):
|
413 |
+
r"""
|
414 |
+
Function invoked when calling the pipeline for generation.
|
415 |
+
|
416 |
+
Args:
|
417 |
+
prompt (`str` or `List[str]`, *optional*):
|
418 |
+
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
|
419 |
+
instead.
|
420 |
+
prompt_2 (`str` or `List[str]`, *optional*):
|
421 |
+
The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
|
422 |
+
used in both text-encoders
|
423 |
+
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
424 |
+
The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
425 |
+
Anything below 512 pixels won't work well for
|
426 |
+
[stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
|
427 |
+
and checkpoints that are not specifically fine-tuned on low resolutions.
|
428 |
+
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
429 |
+
The width in pixels of the generated image. This is set to 1024 by default for the best results.
|
430 |
+
Anything below 512 pixels won't work well for
|
431 |
+
[stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
|
432 |
+
and checkpoints that are not specifically fine-tuned on low resolutions.
|
433 |
+
num_inference_steps (`int`, *optional*, defaults to 50):
|
434 |
+
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
435 |
+
expense of slower inference.
|
436 |
+
timesteps (`List[int]`, *optional*):
|
437 |
+
Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
|
438 |
+
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
439 |
+
passed will be used. Must be in descending order.
|
440 |
+
denoising_end (`float`, *optional*):
|
441 |
+
When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
|
442 |
+
completed before it is intentionally prematurely terminated. As a result, the returned sample will
|
443 |
+
still retain a substantial amount of noise as determined by the discrete timesteps selected by the
|
444 |
+
scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
|
445 |
+
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
446 |
+
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
447 |
+
guidance_scale (`float`, *optional*, defaults to 5.0):
|
448 |
+
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
|
449 |
+
`guidance_scale` is defined as `w` of equation 2. of [Imagen
|
450 |
+
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
451 |
+
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
452 |
+
usually at the expense of lower image quality.
|
453 |
+
negative_prompt (`str` or `List[str]`, *optional*):
|
454 |
+
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
455 |
+
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
456 |
+
less than `1`).
|
457 |
+
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
458 |
+
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
459 |
+
`text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
|
460 |
+
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
461 |
+
The number of images to generate per prompt.
|
462 |
+
eta (`float`, *optional*, defaults to 0.0):
|
463 |
+
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
464 |
+
[`schedulers.DDIMScheduler`], will be ignored for others.
|
465 |
+
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
466 |
+
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
467 |
+
to make generation deterministic.
|
468 |
+
latents (`torch.FloatTensor`, *optional*):
|
469 |
+
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
470 |
+
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
471 |
+
tensor will ge generated by sampling using the supplied random `generator`.
|
472 |
+
prompt_embeds (`torch.FloatTensor`, *optional*):
|
473 |
+
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
474 |
+
provided, text embeddings will be generated from `prompt` input argument.
|
475 |
+
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
|
476 |
+
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
477 |
+
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
478 |
+
argument.
|
479 |
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
480 |
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
481 |
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
482 |
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
483 |
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
484 |
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
485 |
+
input argument.
|
486 |
+
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
487 |
+
output_type (`str`, *optional*, defaults to `"pil"`):
|
488 |
+
The output format of the generate image. Choose between
|
489 |
+
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
490 |
+
return_dict (`bool`, *optional*, defaults to `True`):
|
491 |
+
Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
|
492 |
+
of a plain tuple.
|
493 |
+
cross_attention_kwargs (`dict`, *optional*):
|
494 |
+
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
495 |
+
`self.processor` in
|
496 |
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
497 |
+
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
498 |
+
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
499 |
+
Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
|
500 |
+
[Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
|
501 |
+
Guidance rescale factor should fix overexposure when using zero terminal SNR.
|
502 |
+
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
503 |
+
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
504 |
+
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
505 |
+
explained in section 2.2 of
|
506 |
+
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
507 |
+
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
508 |
+
`crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
|
509 |
+
`crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
|
510 |
+
`crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
|
511 |
+
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
512 |
+
target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
513 |
+
For most cases, `target_size` should be set to the desired height and width of the generated image. If
|
514 |
+
not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
|
515 |
+
section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
516 |
+
negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
517 |
+
To negatively condition the generation process based on a specific image resolution. Part of SDXL's
|
518 |
+
micro-conditioning as explained in section 2.2 of
|
519 |
+
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
520 |
+
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
521 |
+
negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
522 |
+
To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
|
523 |
+
micro-conditioning as explained in section 2.2 of
|
524 |
+
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
525 |
+
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
526 |
+
negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
527 |
+
To negatively condition the generation process based on a target image resolution. It should be as same
|
528 |
+
as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
|
529 |
+
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
530 |
+
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
531 |
+
callback_on_step_end (`Callable`, *optional*):
|
532 |
+
A function that calls at the end of each denoising steps during the inference. The function is called
|
533 |
+
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
|
534 |
+
callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
|
535 |
+
`callback_on_step_end_tensor_inputs`.
|
536 |
+
callback_on_step_end_tensor_inputs (`List`, *optional*):
|
537 |
+
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
538 |
+
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
539 |
+
`._callback_tensor_inputs` attribute of your pipeline class.
|
540 |
+
|
541 |
+
Examples:
|
542 |
+
|
543 |
+
Returns:
|
544 |
+
[`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
|
545 |
+
[`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
|
546 |
+
`tuple`. When returning a tuple, the first element is a list with the generated images.
|
547 |
+
"""
|
548 |
+
|
549 |
+
callback = kwargs.pop("callback", None)
|
550 |
+
callback_steps = kwargs.pop("callback_steps", None)
|
551 |
+
|
552 |
+
if callback is not None:
|
553 |
+
deprecate(
|
554 |
+
"callback",
|
555 |
+
"1.0.0",
|
556 |
+
"Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
557 |
+
)
|
558 |
+
if callback_steps is not None:
|
559 |
+
deprecate(
|
560 |
+
"callback_steps",
|
561 |
+
"1.0.0",
|
562 |
+
"Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
|
563 |
+
)
|
564 |
+
|
565 |
+
# 0. Default height and width to unet
|
566 |
+
height = height or self.default_sample_size * self.vae_scale_factor
|
567 |
+
width = width or self.default_sample_size * self.vae_scale_factor
|
568 |
+
|
569 |
+
original_size = original_size or (height, width)
|
570 |
+
target_size = target_size or (height, width)
|
571 |
+
|
572 |
+
# 1. Check inputs. Raise error if not correct
|
573 |
+
self.check_inputs(
|
574 |
+
prompt,
|
575 |
+
prompt_2,
|
576 |
+
height,
|
577 |
+
width,
|
578 |
+
callback_steps,
|
579 |
+
negative_prompt,
|
580 |
+
negative_prompt_2,
|
581 |
+
prompt_embeds,
|
582 |
+
negative_prompt_embeds,
|
583 |
+
pooled_prompt_embeds,
|
584 |
+
negative_pooled_prompt_embeds,
|
585 |
+
callback_on_step_end_tensor_inputs,
|
586 |
+
)
|
587 |
+
|
588 |
+
self._guidance_scale = guidance_scale
|
589 |
+
self._guidance_rescale = guidance_rescale
|
590 |
+
self._clip_skip = clip_skip
|
591 |
+
self._cross_attention_kwargs = cross_attention_kwargs
|
592 |
+
self._denoising_end = denoising_end
|
593 |
+
self._interrupt = False
|
594 |
+
|
595 |
+
# 2. Define call parameters
|
596 |
+
if prompt is not None and isinstance(prompt, str):
|
597 |
+
batch_size = 1
|
598 |
+
elif prompt is not None and isinstance(prompt, list):
|
599 |
+
batch_size = len(prompt)
|
600 |
+
else:
|
601 |
+
batch_size = prompt_embeds.shape[0]
|
602 |
+
|
603 |
+
device = self._execution_device
|
604 |
+
|
605 |
+
# 3. Encode input prompt
|
606 |
+
lora_scale = (
|
607 |
+
self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
|
608 |
+
)
|
609 |
+
|
610 |
+
(
|
611 |
+
prompt_embeds,
|
612 |
+
negative_prompt_embeds,
|
613 |
+
pooled_prompt_embeds,
|
614 |
+
negative_pooled_prompt_embeds,
|
615 |
+
byt5_prompt_embeds,
|
616 |
+
negative_byt5_prompt_embeds,
|
617 |
+
) = self.encode_prompt(
|
618 |
+
prompt=prompt,
|
619 |
+
prompt_2=prompt_2,
|
620 |
+
text_prompt=text_prompt,
|
621 |
+
device=device,
|
622 |
+
num_images_per_prompt=num_images_per_prompt,
|
623 |
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
624 |
+
negative_prompt=negative_prompt,
|
625 |
+
negative_prompt_2=negative_prompt_2,
|
626 |
+
prompt_embeds=prompt_embeds,
|
627 |
+
negative_prompt_embeds=negative_prompt_embeds,
|
628 |
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
629 |
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
630 |
+
lora_scale=lora_scale,
|
631 |
+
clip_skip=self.clip_skip,
|
632 |
+
text_attn_mask=text_attn_mask,
|
633 |
+
byt5_prompt_embeds=byt5_prompt_embeds,
|
634 |
+
)
|
635 |
+
|
636 |
+
# 4. Prepare timesteps
|
637 |
+
timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
|
638 |
+
|
639 |
+
# 5. Prepare latent variables
|
640 |
+
num_channels_latents = self.unet.config.in_channels
|
641 |
+
latents = self.prepare_latents(
|
642 |
+
batch_size * num_images_per_prompt,
|
643 |
+
num_channels_latents,
|
644 |
+
height,
|
645 |
+
width,
|
646 |
+
prompt_embeds.dtype,
|
647 |
+
device,
|
648 |
+
generator,
|
649 |
+
latents,
|
650 |
+
)
|
651 |
+
|
652 |
+
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
653 |
+
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
654 |
+
|
655 |
+
# 7. Prepare added time ids & embeddings
|
656 |
+
add_text_embeds = pooled_prompt_embeds
|
657 |
+
if self.text_encoder_2 is None:
|
658 |
+
text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
|
659 |
+
else:
|
660 |
+
text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
|
661 |
+
|
662 |
+
add_time_ids = self._get_add_time_ids(
|
663 |
+
original_size,
|
664 |
+
crops_coords_top_left,
|
665 |
+
target_size,
|
666 |
+
dtype=prompt_embeds.dtype,
|
667 |
+
text_encoder_projection_dim=text_encoder_projection_dim,
|
668 |
+
)
|
669 |
+
if negative_original_size is not None and negative_target_size is not None:
|
670 |
+
negative_add_time_ids = self._get_add_time_ids(
|
671 |
+
negative_original_size,
|
672 |
+
negative_crops_coords_top_left,
|
673 |
+
negative_target_size,
|
674 |
+
dtype=prompt_embeds.dtype,
|
675 |
+
text_encoder_projection_dim=text_encoder_projection_dim,
|
676 |
+
)
|
677 |
+
else:
|
678 |
+
negative_add_time_ids = add_time_ids
|
679 |
+
|
680 |
+
if self.do_classifier_free_guidance:
|
681 |
+
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
682 |
+
byt5_prompt_embeds = torch.cat([negative_byt5_prompt_embeds, byt5_prompt_embeds], dim=0)
|
683 |
+
|
684 |
+
add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
|
685 |
+
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
|
686 |
+
|
687 |
+
prompt_embeds = prompt_embeds.to(device)
|
688 |
+
byt5_prompt_embeds = byt5_prompt_embeds.to(device)
|
689 |
+
add_text_embeds = add_text_embeds.to(device)
|
690 |
+
add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
|
691 |
+
|
692 |
+
if ip_adapter_image is not None:
|
693 |
+
output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
|
694 |
+
image_embeds, negative_image_embeds = self.encode_image(
|
695 |
+
ip_adapter_image, device, num_images_per_prompt, output_hidden_state
|
696 |
+
)
|
697 |
+
if self.do_classifier_free_guidance:
|
698 |
+
image_embeds = torch.cat([negative_image_embeds, image_embeds])
|
699 |
+
image_embeds = image_embeds.to(device)
|
700 |
+
|
701 |
+
# 8. Denoising loop
|
702 |
+
num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
|
703 |
+
|
704 |
+
# 8.1 Apply denoising_end
|
705 |
+
if (
|
706 |
+
self.denoising_end is not None
|
707 |
+
and isinstance(self.denoising_end, float)
|
708 |
+
and self.denoising_end > 0
|
709 |
+
and self.denoising_end < 1
|
710 |
+
):
|
711 |
+
discrete_timestep_cutoff = int(
|
712 |
+
round(
|
713 |
+
self.scheduler.config.num_train_timesteps
|
714 |
+
- (self.denoising_end * self.scheduler.config.num_train_timesteps)
|
715 |
+
)
|
716 |
+
)
|
717 |
+
num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
|
718 |
+
timesteps = timesteps[:num_inference_steps]
|
719 |
+
|
720 |
+
# 9. Optionally get Guidance Scale Embedding
|
721 |
+
timestep_cond = None
|
722 |
+
if self.unet.config.time_cond_proj_dim is not None:
|
723 |
+
guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
|
724 |
+
timestep_cond = self.get_guidance_scale_embedding(
|
725 |
+
guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
|
726 |
+
).to(device=device, dtype=latents.dtype)
|
727 |
+
|
728 |
+
assert batch_size == 1, "batch_size > 1 is not supported"
|
729 |
+
if texts is not None:
|
730 |
+
glyph_attn_mask = self.get_glyph_attn_mask(texts, bboxes)
|
731 |
+
# h,w
|
732 |
+
bg_attn_mask = glyph_attn_mask.sum(-1) == 0
|
733 |
+
# 1,h,w,byt5_max_len
|
734 |
+
glyph_attn_masks = glyph_attn_mask.unsqueeze(0).to(device)
|
735 |
+
# 1,h,w
|
736 |
+
bg_attn_masks = bg_attn_mask.unsqueeze(0).to(glyph_attn_masks.dtype).to(device)
|
737 |
+
|
738 |
+
# b, h, w, text_feat_len
|
739 |
+
glyph_attn_masks = (1 - glyph_attn_masks) * -10000.0
|
740 |
+
# b, h, w
|
741 |
+
bg_attn_masks = (1 - bg_attn_masks) * -10000.0
|
742 |
+
num_down_sample = sum(1 if i == 'CrossAttnDownBlock2D' else 0 for i in self.unet.config['down_block_types']) - 1
|
743 |
+
initial_resolution = self.default_sample_size
|
744 |
+
initial_resolution = initial_resolution // 2**sum(1 if i == 'DownBlock2D' else 0 for i in self.unet.config['down_block_types'])
|
745 |
+
resolution_list = [initial_resolution] + [initial_resolution // 2**i for i in range(1, num_down_sample + 1)]
|
746 |
+
glyph_attn_masks_dict = dict()
|
747 |
+
bg_attn_masks_dict = dict()
|
748 |
+
# b, text_fet_len, h, w
|
749 |
+
glyph_attn_masks = glyph_attn_masks.permute(0, 3, 1, 2)
|
750 |
+
# b, 1, h, w
|
751 |
+
bg_attn_masks = bg_attn_masks.unsqueeze(1)
|
752 |
+
for mask_resolution in resolution_list:
|
753 |
+
down_scaled_glyph_attn_masks = F.interpolate(
|
754 |
+
glyph_attn_masks, size=(mask_resolution, mask_resolution), mode='nearest',
|
755 |
+
)
|
756 |
+
# b, text_fet_len, h, w->b, h, w, text_fet_len->b, h*w, text_fet_len
|
757 |
+
down_scaled_glyph_attn_masks = down_scaled_glyph_attn_masks.permute(0, 2, 3, 1).flatten(1, 2)
|
758 |
+
glyph_attn_masks_dict[mask_resolution * mask_resolution] = down_scaled_glyph_attn_masks
|
759 |
+
|
760 |
+
down_scaled_bg_attn_masks = F.interpolate(
|
761 |
+
bg_attn_masks, size=(mask_resolution, mask_resolution), mode='nearest',
|
762 |
+
)
|
763 |
+
# b,1,h,w->b,h,w->b,h,w,1->b,h*w,1->b,h*w,clip_feat_len
|
764 |
+
down_scaled_bg_attn_masks = down_scaled_bg_attn_masks.squeeze(1).unsqueeze(-1)
|
765 |
+
down_scaled_bg_attn_masks = down_scaled_bg_attn_masks.flatten(1, 2)
|
766 |
+
down_scaled_bg_attn_masks = down_scaled_bg_attn_masks.repeat(1, 1, prompt_embeds.shape[1])
|
767 |
+
bg_attn_masks_dict[mask_resolution * mask_resolution] = down_scaled_bg_attn_masks
|
768 |
+
if self.do_classifier_free_guidance:
|
769 |
+
for key in glyph_attn_masks_dict:
|
770 |
+
glyph_attn_masks_dict[key] = torch.cat([
|
771 |
+
torch.zeros_like(glyph_attn_masks_dict[key]),
|
772 |
+
glyph_attn_masks_dict[key]],
|
773 |
+
dim=0)
|
774 |
+
for key in bg_attn_masks_dict:
|
775 |
+
bg_attn_masks_dict[key] = torch.cat([
|
776 |
+
torch.zeros_like(bg_attn_masks_dict[key]),
|
777 |
+
bg_attn_masks_dict[key]],
|
778 |
+
dim=0)
|
779 |
+
else:
|
780 |
+
glyph_attn_masks_dict = None
|
781 |
+
bg_attn_masks_dict = None
|
782 |
+
|
783 |
+
self._num_timesteps = len(timesteps)
|
784 |
+
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
785 |
+
for i, t in enumerate(timesteps):
|
786 |
+
if self.interrupt:
|
787 |
+
continue
|
788 |
+
|
789 |
+
# expand the latents if we are doing classifier free guidance
|
790 |
+
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
791 |
+
|
792 |
+
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
793 |
+
|
794 |
+
# predict the noise residual
|
795 |
+
added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
|
796 |
+
if ip_adapter_image is not None:
|
797 |
+
added_cond_kwargs["image_embeds"] = image_embeds
|
798 |
+
if self.cross_attention_kwargs is None:
|
799 |
+
cross_attention_kwargs = {}
|
800 |
+
else:
|
801 |
+
cross_attention_kwargs = self.cross_attention_kwargs
|
802 |
+
cross_attention_kwargs['glyph_encoder_hidden_states'] = byt5_prompt_embeds
|
803 |
+
cross_attention_kwargs['glyph_attn_masks_dict'] = glyph_attn_masks_dict
|
804 |
+
cross_attention_kwargs['bg_attn_masks_dict'] = bg_attn_masks_dict
|
805 |
+
|
806 |
+
noise_pred = self.unet(
|
807 |
+
latent_model_input,
|
808 |
+
t,
|
809 |
+
encoder_hidden_states=prompt_embeds,
|
810 |
+
timestep_cond=timestep_cond,
|
811 |
+
cross_attention_kwargs=cross_attention_kwargs,
|
812 |
+
added_cond_kwargs=added_cond_kwargs,
|
813 |
+
return_dict=False,
|
814 |
+
)[0]
|
815 |
+
|
816 |
+
# perform guidance
|
817 |
+
if self.do_classifier_free_guidance:
|
818 |
+
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
819 |
+
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
820 |
+
|
821 |
+
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
822 |
+
# Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
|
823 |
+
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
824 |
+
|
825 |
+
# compute the previous noisy sample x_t -> x_t-1
|
826 |
+
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
|
827 |
+
|
828 |
+
if callback_on_step_end is not None:
|
829 |
+
callback_kwargs = {}
|
830 |
+
for k in callback_on_step_end_tensor_inputs:
|
831 |
+
callback_kwargs[k] = locals()[k]
|
832 |
+
callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
|
833 |
+
|
834 |
+
latents = callback_outputs.pop("latents", latents)
|
835 |
+
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
836 |
+
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
837 |
+
add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
|
838 |
+
negative_pooled_prompt_embeds = callback_outputs.pop(
|
839 |
+
"negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
|
840 |
+
)
|
841 |
+
add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
|
842 |
+
negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
|
843 |
+
|
844 |
+
# call the callback, if provided
|
845 |
+
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
846 |
+
progress_bar.update()
|
847 |
+
if callback is not None and i % callback_steps == 0:
|
848 |
+
step_idx = i // getattr(self.scheduler, "order", 1)
|
849 |
+
callback(step_idx, t, latents)
|
850 |
+
|
851 |
+
if not output_type == "latent":
|
852 |
+
# make sure the VAE is in float32 mode, as it overflows in float16
|
853 |
+
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
|
854 |
+
|
855 |
+
if needs_upcasting:
|
856 |
+
self.upcast_vae()
|
857 |
+
latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
|
858 |
+
|
859 |
+
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
860 |
+
|
861 |
+
# cast back to fp16 if needed
|
862 |
+
if needs_upcasting:
|
863 |
+
self.vae.to(dtype=torch.float16)
|
864 |
+
else:
|
865 |
+
image = latents
|
866 |
+
|
867 |
+
if not output_type == "latent":
|
868 |
+
# apply watermark if available
|
869 |
+
if self.watermark is not None:
|
870 |
+
image = self.watermark.apply_watermark(image)
|
871 |
+
|
872 |
+
image = self.image_processor.postprocess(image, output_type=output_type)
|
873 |
+
|
874 |
+
# Offload all models
|
875 |
+
self.maybe_free_model_hooks()
|
876 |
+
|
877 |
+
if not return_dict:
|
878 |
+
return (image,)
|
879 |
+
|
880 |
+
return StableDiffusionXLPipelineOutput(images=image)
|
881 |
+
|
882 |
+
def get_glyph_attn_mask(self, texts, bboxes):
|
883 |
+
resolution = self.default_sample_size
|
884 |
+
text_idx_list = self.get_text_start_pos(texts)
|
885 |
+
mask_tensor = torch.zeros(
|
886 |
+
resolution, resolution, self.byt5_max_length,
|
887 |
+
)
|
888 |
+
for idx, bbox in enumerate(bboxes):
|
889 |
+
# box is in [x, y, w, h] format
|
890 |
+
# area of [y:y+h, x:x+w]
|
891 |
+
bbox = [int(v * resolution + 0.5) for v in bbox]
|
892 |
+
bbox[2] = max(bbox[2], 1)
|
893 |
+
bbox[3] = max(bbox[3], 1)
|
894 |
+
bbox[0: 2] = np.clip(bbox[0: 2], 0, resolution - 1).tolist()
|
895 |
+
bbox[2: 4] = np.clip(bbox[2: 4], 1, resolution).tolist()
|
896 |
+
mask_tensor[
|
897 |
+
bbox[1]: bbox[1] + bbox[3],
|
898 |
+
bbox[0]: bbox[0] + bbox[2],
|
899 |
+
text_idx_list[idx]: text_idx_list[idx + 1]
|
900 |
+
] = 1
|
901 |
+
return mask_tensor
|
902 |
+
|
903 |
+
def get_text_start_pos(self, texts):
|
904 |
+
prompt = "".encode('utf-8')
|
905 |
+
'''
|
906 |
+
Text "{text}" in {color}, {type}.
|
907 |
+
'''
|
908 |
+
pos_list = []
|
909 |
+
for text in texts:
|
910 |
+
pos_list.append(len(prompt))
|
911 |
+
text_prompt = f'Text "{text}"'
|
912 |
+
|
913 |
+
attr_list = ['0', '1']
|
914 |
+
|
915 |
+
attr_suffix = ", ".join(attr_list)
|
916 |
+
text_prompt += " in " + attr_suffix
|
917 |
+
text_prompt += ". "
|
918 |
+
text_prompt = text_prompt.encode('utf-8')
|
919 |
+
|
920 |
+
prompt = prompt + text_prompt
|
921 |
+
pos_list.append(len(prompt))
|
922 |
+
return pos_list
|
glyph_sdxl/modules/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .simple_byt5_mapper import ByT5Mapper
|
2 |
+
from .byt5_block_byt5_mapper import T5EncoderBlockByT5Mapper
|
3 |
+
|
4 |
+
__all__ = [
|
5 |
+
'ByT5Mapper',
|
6 |
+
'T5EncoderBlockByT5Mapper',
|
7 |
+
]
|
glyph_sdxl/modules/byt5_block_byt5_mapper.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
4 |
+
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
import logging
|
8 |
+
from torch import Tensor
|
9 |
+
from diffusers import ModelMixin
|
10 |
+
from transformers.models.t5.modeling_t5 import T5LayerSelfAttention, T5LayerFF, T5LayerNorm
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
class T5EncoderBlock(nn.Module):
|
15 |
+
def __init__(self, config, has_relative_attention_bias=False):
|
16 |
+
super().__init__()
|
17 |
+
self.layer = nn.ModuleList()
|
18 |
+
self.layer.append(T5LayerSelfAttention(config, has_relative_attention_bias=has_relative_attention_bias))
|
19 |
+
self.layer.append(T5LayerFF(config))
|
20 |
+
|
21 |
+
def forward(
|
22 |
+
self,
|
23 |
+
hidden_states,
|
24 |
+
attention_mask=None,
|
25 |
+
position_bias=None,
|
26 |
+
layer_head_mask=None,
|
27 |
+
output_attentions=False,
|
28 |
+
):
|
29 |
+
self_attn_past_key_value, cross_attn_past_key_value = None, None
|
30 |
+
|
31 |
+
self_attention_outputs = self.layer[0](
|
32 |
+
hidden_states,
|
33 |
+
attention_mask=attention_mask,
|
34 |
+
position_bias=position_bias,
|
35 |
+
layer_head_mask=layer_head_mask,
|
36 |
+
past_key_value=self_attn_past_key_value,
|
37 |
+
use_cache=False,
|
38 |
+
output_attentions=output_attentions,
|
39 |
+
)
|
40 |
+
hidden_states, present_key_value_state = self_attention_outputs[:2]
|
41 |
+
attention_outputs = self_attention_outputs[2:] # Keep self-attention outputs and relative position weights
|
42 |
+
|
43 |
+
# clamp inf values to enable fp16 training
|
44 |
+
if hidden_states.dtype == torch.float16:
|
45 |
+
clamp_value = torch.where(
|
46 |
+
torch.isinf(hidden_states).any(),
|
47 |
+
torch.finfo(hidden_states.dtype).max - 1000,
|
48 |
+
torch.finfo(hidden_states.dtype).max,
|
49 |
+
)
|
50 |
+
hidden_states = torch.clamp(hidden_states, min=-clamp_value, max=clamp_value)
|
51 |
+
|
52 |
+
# Apply Feed Forward layer
|
53 |
+
hidden_states = self.layer[-1](hidden_states)
|
54 |
+
|
55 |
+
# clamp inf values to enable fp16 training
|
56 |
+
if hidden_states.dtype == torch.float16:
|
57 |
+
clamp_value = torch.where(
|
58 |
+
torch.isinf(hidden_states).any(),
|
59 |
+
torch.finfo(hidden_states.dtype).max - 1000,
|
60 |
+
torch.finfo(hidden_states.dtype).max,
|
61 |
+
)
|
62 |
+
hidden_states = torch.clamp(hidden_states, min=-clamp_value, max=clamp_value)
|
63 |
+
|
64 |
+
outputs = (hidden_states,) + attention_outputs
|
65 |
+
|
66 |
+
return outputs # hidden-states, present_key_value_states, (self-attention position bias), (self-attention weights), (cross-attention position bias), (cross-attention weights)
|
67 |
+
|
68 |
+
class T5EncoderBlockByT5Mapper(ModelMixin):
|
69 |
+
def __init__(self, byt5_config, num_layers, sdxl_channels=None):
|
70 |
+
super().__init__()
|
71 |
+
if num_layers > 0:
|
72 |
+
self.blocks = nn.ModuleList(
|
73 |
+
[
|
74 |
+
T5EncoderBlock(
|
75 |
+
byt5_config,
|
76 |
+
has_relative_attention_bias=bool(i == 0))
|
77 |
+
for i in range(num_layers)
|
78 |
+
]
|
79 |
+
)
|
80 |
+
else:
|
81 |
+
self.blocks = None
|
82 |
+
self.layer_norm = T5LayerNorm(byt5_config.d_model, eps=byt5_config.layer_norm_epsilon)
|
83 |
+
if sdxl_channels is not None:
|
84 |
+
self.channel_mapper = nn.Linear(byt5_config.d_model, sdxl_channels)
|
85 |
+
self.final_layer_norm = T5LayerNorm(sdxl_channels, eps=byt5_config.layer_norm_epsilon)
|
86 |
+
else:
|
87 |
+
self.channel_mapper = None
|
88 |
+
self.final_layer_norm = None
|
89 |
+
|
90 |
+
def get_extended_attention_mask(
|
91 |
+
self, attention_mask: Tensor, input_shape: Tuple[int], device: torch.device = None, dtype: torch.float = None
|
92 |
+
) -> Tensor:
|
93 |
+
"""
|
94 |
+
Makes broadcastable attention and causal masks so that future and masked tokens are ignored.
|
95 |
+
|
96 |
+
Arguments:
|
97 |
+
attention_mask (`torch.Tensor`):
|
98 |
+
Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
|
99 |
+
input_shape (`Tuple[int]`):
|
100 |
+
The shape of the input to the model.
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
`torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
|
104 |
+
"""
|
105 |
+
if dtype is None:
|
106 |
+
dtype = self.dtype
|
107 |
+
|
108 |
+
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
|
109 |
+
# ourselves in which case we just need to make it broadcastable to all heads.
|
110 |
+
if attention_mask.dim() == 3:
|
111 |
+
extended_attention_mask = attention_mask[:, None, :, :]
|
112 |
+
elif attention_mask.dim() == 2:
|
113 |
+
# Provided a padding mask of dimensions [batch_size, seq_length]
|
114 |
+
# - if the model is a decoder, apply a causal mask in addition to the padding mask
|
115 |
+
# - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length]
|
116 |
+
extended_attention_mask = attention_mask[:, None, None, :]
|
117 |
+
else:
|
118 |
+
raise ValueError(
|
119 |
+
f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
|
120 |
+
)
|
121 |
+
|
122 |
+
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
|
123 |
+
# masked positions, this operation will create a tensor which is 0.0 for
|
124 |
+
# positions we want to attend and the dtype's smallest value for masked positions.
|
125 |
+
# Since we are adding it to the raw scores before the softmax, this is
|
126 |
+
# effectively the same as removing these entirely.
|
127 |
+
extended_attention_mask = extended_attention_mask.to(dtype=dtype) # fp16 compatibility
|
128 |
+
extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min
|
129 |
+
return extended_attention_mask
|
130 |
+
|
131 |
+
|
132 |
+
def forward(self, inputs_embeds, attention_mask):
|
133 |
+
input_shape = inputs_embeds.size()[:-1]
|
134 |
+
extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape)
|
135 |
+
|
136 |
+
hidden_states = inputs_embeds
|
137 |
+
position_bias = None
|
138 |
+
|
139 |
+
if self.blocks is not None:
|
140 |
+
for layer_module in self.blocks:
|
141 |
+
layer_outputs = layer_module(
|
142 |
+
hidden_states,
|
143 |
+
attention_mask=extended_attention_mask,
|
144 |
+
position_bias=position_bias,
|
145 |
+
)
|
146 |
+
hidden_states, position_bias = layer_outputs
|
147 |
+
hidden_states = self.layer_norm(hidden_states)
|
148 |
+
if self.channel_mapper is not None:
|
149 |
+
hidden_states = self.channel_mapper(hidden_states)
|
150 |
+
hidden_states = self.final_layer_norm(hidden_states)
|
151 |
+
return hidden_states
|
glyph_sdxl/modules/simple_byt5_mapper.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import ModelMixin
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
class ByT5Mapper(ModelMixin):
|
5 |
+
def __init__(self, byt5_output_dim, sdxl_text_dim):
|
6 |
+
super().__init__()
|
7 |
+
self.mapper = nn.Sequential(
|
8 |
+
nn.LayerNorm(byt5_output_dim),
|
9 |
+
nn.Linear(byt5_output_dim, sdxl_text_dim),
|
10 |
+
nn.ReLU(),
|
11 |
+
nn.Linear(sdxl_text_dim, sdxl_text_dim)
|
12 |
+
)
|
13 |
+
|
14 |
+
def forward(self, byt5_embedding):
|
15 |
+
return self.mapper(byt5_embedding)
|
16 |
+
|
glyph_sdxl/utils/__init__.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .parse_config import parse_config
|
2 |
+
from .constants import (
|
3 |
+
UNET_CKPT_NAME,
|
4 |
+
BYT5_CKPT_NAME,
|
5 |
+
BYT5_MAPPER_CKPT_NAME,
|
6 |
+
INSERTED_ATTN_CKPT_NAME,
|
7 |
+
huggingface_cache_dir,
|
8 |
+
)
|
9 |
+
from .load_pretrained_byt5 import load_byt5_and_byt5_tokenizer
|
10 |
+
from .format_prompt import PromptFormat, MultilingualPromptFormat
|
11 |
+
|
12 |
+
__all__ = [
|
13 |
+
'parse_config',
|
14 |
+
'UNET_CKPT_NAME',
|
15 |
+
'BYT5_CKPT_NAME',
|
16 |
+
'BYT5_MAPPER_CKPT_NAME',
|
17 |
+
'huggingface_cache_dir',
|
18 |
+
'load_byt5_and_byt5_tokenizer',
|
19 |
+
'INSERTED_ATTN_CKPT_NAME',
|
20 |
+
'PromptFormat',
|
21 |
+
'MultilingualPromptFormat',
|
22 |
+
]
|
23 |
+
|
glyph_sdxl/utils/constants.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
UNET_CKPT_NAME = "unet_lora.pt"
|
2 |
+
INSERTED_ATTN_CKPT_NAME = "unet_inserted_attn.pt"
|
3 |
+
BYT5_CKPT_NAME = "byt5_model.pt"
|
4 |
+
BYT5_MAPPER_CKPT_NAME = "byt5_mapper.pt"
|
5 |
+
huggingface_cache_dir = None
|
glyph_sdxl/utils/format_prompt.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import webcolors
|
3 |
+
|
4 |
+
|
5 |
+
def closest_color(requested_color):
|
6 |
+
min_colors = {}
|
7 |
+
for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
|
8 |
+
r_c, g_c, b_c = webcolors.hex_to_rgb(key)
|
9 |
+
rd = (r_c - requested_color[0]) ** 2
|
10 |
+
gd = (g_c - requested_color[1]) ** 2
|
11 |
+
bd = (b_c - requested_color[2]) ** 2
|
12 |
+
min_colors[(rd + gd + bd)] = name
|
13 |
+
return min_colors[min(min_colors.keys())]
|
14 |
+
|
15 |
+
def convert_rgb_to_names(rgb_tuple):
|
16 |
+
try:
|
17 |
+
color_name = webcolors.rgb_to_name(rgb_tuple)
|
18 |
+
except ValueError:
|
19 |
+
color_name = closest_color(rgb_tuple)
|
20 |
+
return color_name
|
21 |
+
|
22 |
+
class PromptFormat():
|
23 |
+
def __init__(
|
24 |
+
self,
|
25 |
+
font_path: str = 'assets/font_idx_512.json',
|
26 |
+
color_path: str = 'assets/color_idx.json',
|
27 |
+
):
|
28 |
+
with open(font_path, 'r') as f:
|
29 |
+
self.font_dict = json.load(f)
|
30 |
+
with open(color_path, 'r') as f:
|
31 |
+
self.color_dict = json.load(f)
|
32 |
+
|
33 |
+
def format_checker(self, texts, styles):
|
34 |
+
assert len(texts) == len(styles), 'length of texts must be equal to length of styles'
|
35 |
+
for style in styles:
|
36 |
+
assert style['font-family'] in self.font_dict, f"invalid font-family: {style['font-family']}"
|
37 |
+
rgb_color = webcolors.hex_to_rgb(style['color'])
|
38 |
+
color_name = convert_rgb_to_names(rgb_color)
|
39 |
+
assert color_name in self.color_dict, f"invalid color hex {color_name}"
|
40 |
+
|
41 |
+
def format_prompt(self, texts, styles):
|
42 |
+
self.format_checker(texts, styles)
|
43 |
+
|
44 |
+
prompt = ""
|
45 |
+
'''
|
46 |
+
Text "{text}" in {color}, {type}.
|
47 |
+
'''
|
48 |
+
for text, style in zip(texts, styles):
|
49 |
+
text_prompt = f'Text "{text}"'
|
50 |
+
|
51 |
+
attr_list = []
|
52 |
+
|
53 |
+
# format color
|
54 |
+
hex_color = style["color"]
|
55 |
+
rgb_color = webcolors.hex_to_rgb(hex_color)
|
56 |
+
color_name = convert_rgb_to_names(rgb_color)
|
57 |
+
attr_list.append(f"<color-{self.color_dict[color_name]}>")
|
58 |
+
|
59 |
+
# format font
|
60 |
+
attr_list.append(f"<font-{self.font_dict[style['font-family']]}>")
|
61 |
+
attr_suffix = ", ".join(attr_list)
|
62 |
+
text_prompt += " in " + attr_suffix
|
63 |
+
text_prompt += ". "
|
64 |
+
|
65 |
+
prompt = prompt + text_prompt
|
66 |
+
return prompt
|
67 |
+
|
68 |
+
|
69 |
+
class MultilingualPromptFormat():
|
70 |
+
def __init__(
|
71 |
+
self,
|
72 |
+
font_path: str = 'assets/multilingual_cn-en_font_idx.json',
|
73 |
+
color_path: str = 'assets/color_idx.json',
|
74 |
+
):
|
75 |
+
with open(font_path, 'r') as f:
|
76 |
+
self.font_dict = json.load(f)
|
77 |
+
with open(color_path, 'r') as f:
|
78 |
+
self.color_dict = json.load(f)
|
79 |
+
|
80 |
+
def format_checker(self, texts, styles):
|
81 |
+
assert len(texts) == len(styles), 'length of texts must be equal to length of styles'
|
82 |
+
for style in styles:
|
83 |
+
assert style['font-family'] in self.font_dict, f"invalid font-family: {style['font-family']}"
|
84 |
+
rgb_color = webcolors.hex_to_rgb(style['color'])
|
85 |
+
color_name = convert_rgb_to_names(rgb_color)
|
86 |
+
assert color_name in self.color_dict, f"invalid color hex {color_name}"
|
87 |
+
|
88 |
+
def format_prompt(self, texts, styles):
|
89 |
+
self.format_checker(texts, styles)
|
90 |
+
|
91 |
+
prompt = ""
|
92 |
+
'''
|
93 |
+
Text "{text}" in {color}, {type}.
|
94 |
+
'''
|
95 |
+
for text, style in zip(texts, styles):
|
96 |
+
text_prompt = f'Text "{text}"'
|
97 |
+
|
98 |
+
attr_list = []
|
99 |
+
|
100 |
+
# format color
|
101 |
+
hex_color = style["color"]
|
102 |
+
rgb_color = webcolors.hex_to_rgb(hex_color)
|
103 |
+
color_name = convert_rgb_to_names(rgb_color)
|
104 |
+
attr_list.append(f"<color-{self.color_dict[color_name]}>")
|
105 |
+
|
106 |
+
# format font
|
107 |
+
attr_list.append(f"<{style['font-family'][:2]}-font-{self.font_dict[style['font-family']]}>")
|
108 |
+
attr_suffix = ", ".join(attr_list)
|
109 |
+
text_prompt += " in " + attr_suffix
|
110 |
+
text_prompt += ". "
|
111 |
+
|
112 |
+
prompt = prompt + text_prompt
|
113 |
+
return prompt
|
glyph_sdxl/utils/load_pretrained_byt5.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
from transformers import AutoTokenizer, T5ForConditionalGeneration
|
4 |
+
from diffusers.utils import logging
|
5 |
+
|
6 |
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
7 |
+
|
8 |
+
def add_special_token(tokenizer, text_encoder, add_color, add_font, color_ann_path, font_ann_path, multilingual=False):
|
9 |
+
with open(font_ann_path, 'r') as f:
|
10 |
+
idx_font_dict = json.load(f)
|
11 |
+
with open(color_ann_path, 'r') as f:
|
12 |
+
idx_color_dict = json.load(f)
|
13 |
+
|
14 |
+
if multilingual:
|
15 |
+
font_token = []
|
16 |
+
for font_code in idx_font_dict:
|
17 |
+
prefix = font_code[:2]
|
18 |
+
font_token.append(f'<{prefix}-font-{idx_font_dict[font_code]}>')
|
19 |
+
else:
|
20 |
+
font_token = [f'<font-{i}>' for i in range(len(idx_font_dict))]
|
21 |
+
color_token = [f'<color-{i}>' for i in range(len(idx_color_dict))]
|
22 |
+
additional_special_tokens = []
|
23 |
+
if add_color:
|
24 |
+
additional_special_tokens += color_token
|
25 |
+
if add_font:
|
26 |
+
additional_special_tokens += font_token
|
27 |
+
tokenizer.add_tokens(additional_special_tokens, special_tokens=True)
|
28 |
+
text_encoder.resize_token_embeddings(len(tokenizer))
|
29 |
+
|
30 |
+
def load_byt5_and_byt5_tokenizer(
|
31 |
+
byt5_name='google/byt5-small',
|
32 |
+
special_token=False,
|
33 |
+
color_special_token=False,
|
34 |
+
font_special_token=False,
|
35 |
+
color_ann_path='assets/color_idx.json',
|
36 |
+
font_ann_path='assets/font_idx_512.json',
|
37 |
+
huggingface_cache_dir=None,
|
38 |
+
multilingual=False,
|
39 |
+
):
|
40 |
+
byt5_tokenizer = AutoTokenizer.from_pretrained(
|
41 |
+
byt5_name, cache_dir=huggingface_cache_dir,
|
42 |
+
)
|
43 |
+
byt5_text_encoder = T5ForConditionalGeneration.from_pretrained(
|
44 |
+
byt5_name, cache_dir=huggingface_cache_dir,
|
45 |
+
).get_encoder()
|
46 |
+
|
47 |
+
if special_token:
|
48 |
+
add_special_token(
|
49 |
+
byt5_tokenizer,
|
50 |
+
byt5_text_encoder,
|
51 |
+
add_color=color_special_token,
|
52 |
+
add_font=font_special_token,
|
53 |
+
color_ann_path=color_ann_path,
|
54 |
+
font_ann_path=font_ann_path,
|
55 |
+
multilingual=multilingual,
|
56 |
+
)
|
57 |
+
|
58 |
+
logger.info(f'Loaded original byt5 weight')
|
59 |
+
|
60 |
+
return byt5_text_encoder, byt5_tokenizer
|
glyph_sdxl/utils/parse_config.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import os.path as osp
|
4 |
+
from mmengine.config import Config
|
5 |
+
|
6 |
+
|
7 |
+
def parse_config(path=None):
|
8 |
+
if path is None:
|
9 |
+
parser = argparse.ArgumentParser()
|
10 |
+
parser.add_argument('config_dir', type=str)
|
11 |
+
args = parser.parse_args()
|
12 |
+
path = args.config_dir
|
13 |
+
config = Config.fromfile(path)
|
14 |
+
|
15 |
+
config.config_dir = path
|
16 |
+
|
17 |
+
return config
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.36.2
|
2 |
+
diffusers==0.26.1
|
3 |
+
mmengine
|
4 |
+
accelerate
|
5 |
+
torch==2.2.0
|
6 |
+
torchvision==0.17.0
|
7 |
+
deepspeed
|
8 |
+
peft
|
9 |
+
webcolors
|
10 |
+
gradio
|