| { | |
| "_class_name": "AutoencoderDC", | |
| "_diffusers_version": "0.34.0.dev0", | |
| "_name_or_path": "/Users/davidberenstein/.cache/huggingface/hub/models--Efficient-Large-Model--Sana_Sprint_0.6B_1024px_diffusers/snapshots/a7d9fc31dd5c3f5e22dbfd78360777ceed56ae97/vae", | |
| "attention_head_dim": 32, | |
| "decoder_act_fns": "silu", | |
| "decoder_block_out_channels": [ | |
| 128, | |
| 256, | |
| 512, | |
| 512, | |
| 1024, | |
| 1024 | |
| ], | |
| "decoder_block_types": [ | |
| "ResBlock", | |
| "ResBlock", | |
| "ResBlock", | |
| "EfficientViTBlock", | |
| "EfficientViTBlock", | |
| "EfficientViTBlock" | |
| ], | |
| "decoder_layers_per_block": [ | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3, | |
| 3 | |
| ], | |
| "decoder_norm_types": "rms_norm", | |
| "decoder_qkv_multiscales": [ | |
| [], | |
| [], | |
| [], | |
| [ | |
| 5 | |
| ], | |
| [ | |
| 5 | |
| ], | |
| [ | |
| 5 | |
| ] | |
| ], | |
| "downsample_block_type": "Conv", | |
| "encoder_block_out_channels": [ | |
| 128, | |
| 256, | |
| 512, | |
| 512, | |
| 1024, | |
| 1024 | |
| ], | |
| "encoder_block_types": [ | |
| "ResBlock", | |
| "ResBlock", | |
| "ResBlock", | |
| "EfficientViTBlock", | |
| "EfficientViTBlock", | |
| "EfficientViTBlock" | |
| ], | |
| "encoder_layers_per_block": [ | |
| 2, | |
| 2, | |
| 2, | |
| 3, | |
| 3, | |
| 3 | |
| ], | |
| "encoder_qkv_multiscales": [ | |
| [], | |
| [], | |
| [], | |
| [ | |
| 5 | |
| ], | |
| [ | |
| 5 | |
| ], | |
| [ | |
| 5 | |
| ] | |
| ], | |
| "in_channels": 3, | |
| "latent_channels": 32, | |
| "scaling_factor": 0.41407, | |
| "upsample_block_type": "interpolate" | |
| } | |