DiffusionLM-ROCStories / config.json
michaelsyao's picture
Push model using huggingface_hub.
c7a8b44 verified
{
"adam_beta1": 0.9,
"adam_beta2": 0.999,
"adam_weight_decay": 0.01,
"amp": false,
"class_conditional": false,
"class_unconditional_prob": 0.1,
"clip_grad_norm": 1.0,
"dataset_name": "roc",
"dim_ae": 64,
"disable_dropout": false,
"dropout": 0.1,
"ema_decay": 0.9999,
"ema_update_every": 1,
"enc_dec_model": "facebook/bart-base",
"eval": false,
"eval_batch_size": 32,
"eval_every": 1000,
"eval_test": false,
"gradient_accumulation_steps": 1,
"init_path": null,
"l2_normalize_latents": true,
"latent_dim": 64,
"latent_model_path": "saved_latent_models/roc/2024-11-24_09-55-03",
"learning_rate": 0.0001,
"lm_mode": "freeze",
"loss_type": "l2",
"lr_schedule": "linear",
"lr_warmup_steps": 1000,
"max_seq_len": 64,
"mixed_precision": "no",
"normalize_latent": false,
"num_decoder_latents": 32,
"num_dense_connections": 3,
"num_devices": 1,
"num_encoder_latents": 32,
"num_layers": 3,
"num_samples": 1000,
"num_train_steps": 50000,
"objective": "pred_v",
"optimizer": "adamw",
"output_dir": "saved_latent_models/roc/2024-11-24_09-55-03",
"resume_dir": null,
"resume_training": false,
"sampler": "ddpm",
"sampling_schedule": null,
"sampling_timesteps": 250,
"save_and_sample_every": 5000,
"save_dir": "saved_latent_models",
"scale": 1.0,
"scale_shift": true,
"self_condition": true,
"seq2seq_candidates": 5,
"seq2seq_unconditional_prob": 0.1,
"train_batch_size": 256,
"train_prob_self_cond": 0.5,
"train_schedule": "cosine",
"trainable_params": 187928960,
"tx_depth": 12,
"tx_dim": 768,
"wandb_name": "bart-roc-l2norm-test-32-64"
}