{ "mulan": { "sr": 24000, "clip_secs": 10, "dim_latent": 512, "decoupled_contrastive_learning": true, "hierarchical_contrastive_loss": false, "hierarchical_contrastive_loss_layers": null, "sigmoid_contrastive_loss": false, "rank_contrast": true }, "audio_model": { "name": "OpenMuQ/MuQ-large-msd-iter", "model_dim": 1024, "use_layer_idx": -1 }, "text_model": { "name": "xlm-roberta-base", "model_dim": null, "use_layer_idx": -1 }, "audio_transformer": { "dim": 768, "tf_depth": 0, "heads": 8, "dim_head": 64, "attn_dropout": 0, "ff_dropout": 0, "ff_mult": 4 }, "text_transformer": { "dim": 768, "tf_depth": 8, "max_seq_len": 1024, "dim_head": 64, "heads": 8, "attn_dropout": 0, "ff_dropout": 0, "ff_mult": 4 } }