vae-lyra-sdxl-t5xl / config.json
AbstractPhil's picture
Config update at step 7814
fa4896d verified
{
"modality_dims": {
"clip_l": 768,
"clip_g": 1280,
"t5_xl": 2048
},
"latent_dim": 2048,
"seq_len": 77,
"encoder_layers": 3,
"decoder_layers": 3,
"hidden_dim": 1024,
"dropout": 0.1,
"fusion_strategy": "cantor",
"fusion_heads": 8,
"fusion_dropout": 0.1,
"beta_kl": 0.1,
"beta_reconstruction": 1.0,
"beta_cross_modal": 0.1,
"recon_type": "mse",
"modality_recon_weights": {
"clip_l": 1.0,
"clip_g": 1.0,
"t5_xl": 0.3
},
"cross_modal_projection_dim": 768,
"use_kl_annealing": true,
"kl_anneal_epochs": 5,
"kl_start_beta": 0.0,
"batch_size": 64,
"num_epochs": 50,
"learning_rate": 0.0001,
"weight_decay": 1e-05,
"gradient_clip": 1.0,
"use_scheduler": true,
"scheduler_type": "cosine",
"num_samples": 50000,
"synthetic_ratio": 0.05,
"checkpoint_dir": "./checkpoints_lyra_sdxl_t5xl",
"save_every": 2500,
"keep_last_n": 3,
"hf_repo": "AbstractPhil/vae-lyra-sdxl-t5xl",
"push_to_hub": true,
"push_every": 5000,
"auto_load_from_hub": true,
"use_wandb": false,
"wandb_project": "vae-lyra-sdxl-t5xl",
"wandb_entity": null,
"log_every": 50,
"device": "cuda",
"mixed_precision": true,
"seed": 42,
"num_workers": 0
}