model: | |
base_learning_rate: 4.5e-06 | |
target: taming.models.vqgan.VQModel | |
params: | |
embed_dim: 4 | |
n_embed: 16384 | |
monitor: val/rec_loss | |
ddconfig: | |
double_z: false | |
z_channels: 4 | |
resolution: 256 | |
in_channels: 3 | |
out_ch: 3 | |
ch: 128 | |
ch_mult: | |
- 1 | |
- 2 | |
- 2 | |
- 4 | |
num_res_blocks: 2 | |
attn_resolutions: | |
- 32 | |
dropout: 0.0 | |
lossconfig: | |
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator | |
params: | |
disc_conditional: false | |
disc_in_channels: 3 | |
disc_num_layers: 2 | |
disc_start: 1 | |
disc_weight: 0.6 | |
codebook_weight: 1.0 | |
data: | |
target: main.DataModuleFromConfig | |
params: | |
batch_size: 10 | |
num_workers: 20 | |
wrap: true | |
train: | |
target: ldm.data.openimages.FullOpenImagesTrain | |
params: | |
size: 384 | |
crop_size: 256 | |
validation: | |
target: ldm.data.openimages.FullOpenImagesValidation | |
params: | |
size: 384 | |
crop_size: 256 | |