|
{ |
|
"adam_beta1": 0.9, |
|
"adam_beta2": 0.999, |
|
"adam_weight_decay": 0.01, |
|
"amp": false, |
|
"class_conditional": false, |
|
"class_unconditional_prob": 0.1, |
|
"clip_grad_norm": 1.0, |
|
"dataset_name": "roc", |
|
"dim_ae": 64, |
|
"disable_dropout": false, |
|
"dropout": 0.1, |
|
"ema_decay": 0.9999, |
|
"ema_update_every": 1, |
|
"enc_dec_model": "facebook/bart-base", |
|
"eval": false, |
|
"eval_batch_size": 32, |
|
"eval_every": 1000, |
|
"eval_test": false, |
|
"gradient_accumulation_steps": 1, |
|
"init_path": null, |
|
"l2_normalize_latents": true, |
|
"latent_dim": 64, |
|
"latent_model_path": "saved_latent_models/roc/2024-11-24_09-55-03", |
|
"learning_rate": 0.0001, |
|
"lm_mode": "freeze", |
|
"loss_type": "l2", |
|
"lr_schedule": "linear", |
|
"lr_warmup_steps": 1000, |
|
"max_seq_len": 64, |
|
"mixed_precision": "no", |
|
"normalize_latent": false, |
|
"num_decoder_latents": 32, |
|
"num_dense_connections": 3, |
|
"num_devices": 1, |
|
"num_encoder_latents": 32, |
|
"num_layers": 3, |
|
"num_samples": 1000, |
|
"num_train_steps": 50000, |
|
"objective": "pred_v", |
|
"optimizer": "adamw", |
|
"output_dir": "saved_latent_models/roc/2024-11-24_09-55-03", |
|
"resume_dir": null, |
|
"resume_training": false, |
|
"sampler": "ddpm", |
|
"sampling_schedule": null, |
|
"sampling_timesteps": 250, |
|
"save_and_sample_every": 5000, |
|
"save_dir": "saved_latent_models", |
|
"scale": 1.0, |
|
"scale_shift": true, |
|
"self_condition": true, |
|
"seq2seq_candidates": 5, |
|
"seq2seq_unconditional_prob": 0.1, |
|
"train_batch_size": 256, |
|
"train_prob_self_cond": 0.5, |
|
"train_schedule": "cosine", |
|
"trainable_params": 187928960, |
|
"tx_depth": 12, |
|
"tx_dim": 768, |
|
"wandb_name": "bart-roc-l2norm-test-32-64" |
|
} |