{ "module": "keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone", "class_name": "StableDiffusion3Backbone", "config": { "name": "stable_diffusion_3_backbone", "trainable": true, "mmdit_patch_size": 2, "mmdit_hidden_dim": 1536, "mmdit_num_layers": 24, "mmdit_num_heads": 24, "mmdit_position_size": 192, "vae": { "module": "keras_hub.src.models.vae.vae_backbone", "class_name": "VAEBackbone", "config": { "name": "vae", "trainable": true, "encoder_num_filters": [ 128, 256, 512, 512 ], "encoder_num_blocks": [ 2, 2, 2, 2 ], "decoder_num_filters": [ 512, 512, 256, 128 ], "decoder_num_blocks": [ 3, 3, 3, 3 ], "sampler_method": "sample", "input_channels": 3, "sample_channels": 32, "output_channels": 3, "scale": 1.5305, "shift": 0.0609 }, "registered_name": "VAEBackbone" }, "clip_l": { "module": "keras_hub.src.models.clip.clip_text_encoder", "class_name": "CLIPTextEncoder", "config": { "name": "clip_l", "trainable": true, "vocabulary_size": 49408, "embedding_dim": 768, "hidden_dim": 768, "num_layers": 12, "num_heads": 12, "intermediate_dim": 3072, "intermediate_activation": "quick_gelu", "intermediate_output_index": 10, "max_sequence_length": 77 }, "registered_name": "CLIPTextEncoder" }, "clip_g": { "module": "keras_hub.src.models.clip.clip_text_encoder", "class_name": "CLIPTextEncoder", "config": { "name": "clip_g", "trainable": true, "vocabulary_size": 49408, "embedding_dim": 1280, "hidden_dim": 1280, "num_layers": 32, "num_heads": 20, "intermediate_dim": 5120, "intermediate_activation": "gelu", "intermediate_output_index": 30, "max_sequence_length": 77 }, "registered_name": "CLIPTextEncoder" }, "t5": null, "latent_channels": 16, "output_channels": 3, "num_train_timesteps": 1000, "shift": 3.0, "height": 1024, "width": 1024 }, "registered_name": "keras_hub>StableDiffusion3Backbone" }