afv03-lora / rope-15 /rope_migration.yml
p1atdev's picture
Upload rope_migration.yml
587f3dd verified
raw
history blame
1.67 kB
model:
checkpoint_path: "./models/aura_flow_0.3.bnb_nf4.safetensors"
pretrained_model_name_or_path: fal/AuraFlow-v0.3
dtype: bfloat16
denoiser:
use_flash_attn: true
use_rope: True
rope_theta: 10000
rope_dim_sizes: [32, 112, 112]
noise_prediction_loss: true
migration_loss: true
prior_preservation_loss: false
migration_freezing_threshold: 1.0e-7
peft:
type: lora
rank: 4
alpha: 1.0
dropout: 0.0
dtype: bfloat16
include_keys:
- ".mlp."
- ".attn."
exclude_keys:
- "text_encoder"
- "vae"
- "t_embedder"
- "final_linear"
- regex: .*\.mod[CX]{1,2} # exclude modulation layers (modC, modCX, modX)
dataset:
folder: "datasets/pexels-50k"
num_repeats: 1
batch_size: 2
bucket_base_size: 1024
step: 128
min_size: 384
do_upscale: false
caption_processors: []
optimizer:
name: "schedulefree.RAdamScheduleFree"
args:
lr: 0.05
tracker:
project_name: "auraflow-rope-1"
loggers:
- wandb
saving:
strategy:
per_epochs: 1
per_steps: 500
# save_last: true
callbacks:
- type: "hf_hub" # or "hf_hub" to push to hub
name: "rope-15"
save_dir: "./output/rope-15"
hub_id: "p1atdev/afv03-lora"
dir_in_repo: "rope-15"
preview:
strategy:
per_epochs: 1
per_steps: 100
callbacks:
- type: "discord"
url: "masked"
data:
path: "./projects/rope/preview.yml"
seed: 42
num_train_epochs: 10
trainer:
# debug_mode: "1step"
gradient_checkpointing: true
gradient_accumulation_steps: 8
torch_compile: true
torch_compile_args:
mode: max-autotune
fullgraph: true
fp32_matmul_precision: "medium"