File size: 4,045 Bytes
a73431a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
name: michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
description: ''
tag: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
seed: 0
use_timestamp: true
timestamp: ''
exp_root_dir: outputs
exp_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
trial_name: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
trial_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k/michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
n_gpus: 8
resume: ./ckpts/3DNativeGeneration/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k.ckpt
data_type: objaverse-datamodule
data:
root_dir: data/objaverse_clean/cap3d_high_quality_170k_images
data_type: occupancy
n_samples: 4096
noise_sigma: 0.0
load_supervision: false
supervision_type: occupancy
n_supervision: 10000
load_image: true
image_data_path: data/objaverse_clean/raw_data/images/cap3d_high_quality_170k
image_type: mvrgb
idx:
- 0
- 4
- 8
- 12
- 16
n_views: 4
load_caption: false
rotate_points: false
batch_size: 32
num_workers: 16
system_type: shape-diffusion-system
system:
val_samples_json: val_data/mv_images/val_samples_rgb_mvimage.json
z_scale_factor: 1.0
guidance_scale: 7.5
num_inference_steps: 50
eta: 0.0
shape_model_type: michelangelo-aligned-autoencoder
shape_model:
num_latents: 256
embed_dim: 64
point_feats: 3
out_dim: 1
num_freqs: 8
include_pi: false
heads: 12
width: 768
num_encoder_layers: 8
num_decoder_layers: 16
use_ln_post: true
init_scale: 0.25
qkv_bias: false
use_flash: true
use_checkpoint: true
condition_model_type: clip-embedder
condition_model:
pretrained_model_name_or_path: openai/clip-vit-large-patch14
encode_camera: true
camera_embeds_dim: 32
n_views: 4
empty_embeds_ratio: 0.1
normalize_embeds: false
zero_uncond_embeds: true
denoiser_model_type: simple-denoiser
denoiser_model:
input_channels: 64
output_channels: 64
n_ctx: 256
width: 768
layers: 6
heads: 12
context_dim: 1024
init_scale: 1.0
skip_ln: true
use_checkpoint: true
noise_scheduler_type: diffusers.schedulers.DDPMScheduler
noise_scheduler:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: scaled_linear
variance_type: fixed_small
clip_sample: false
denoise_scheduler_type: diffusers.schedulers.DDIMScheduler
denoise_scheduler:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: scaled_linear
clip_sample: false
set_alpha_to_one: false
steps_offset: 1
loggers:
wandb:
enable: false
project: JiangXin
name: text-to-shape-diffusion+michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k+michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
loss:
loss_type: mse
lambda_diffusion: 1.0
optimizer:
name: AdamW
args:
lr: 5.0e-05
betas:
- 0.9
- 0.99
eps: 1.0e-06
scheduler:
name: SequentialLR
interval: step
schedulers:
- name: LinearLR
interval: step
args:
start_factor: 1.0e-06
end_factor: 1.0
total_iters: 5000
- name: CosineAnnealingLR
interval: step
args:
T_max: 5000
eta_min: 0.0
milestones:
- 5000
trainer:
num_nodes: 2
max_epochs: 100000
log_every_n_steps: 5
num_sanity_val_steps: 1
check_val_every_n_epoch: 3
enable_progress_bar: true
precision: 16-mixed
strategy: ddp_find_unused_parameters_true
checkpoint:
save_last: true
save_top_k: -1
every_n_train_steps: 5000
|