| { | |
| "trainer": { | |
| "trainer_class": "BatchTopKCrossCoderTrainer", | |
| "dict_class": "BatchTopKCrossCoder", | |
| "lr": 0.0001, | |
| "steps": 48828, | |
| "auxk_alpha": 0.03125, | |
| "warmup_steps": 1000, | |
| "decay_start": null, | |
| "threshold_beta": 0.999, | |
| "threshold_start_step": 1000, | |
| "top_k_aux": 1792, | |
| "seed": null, | |
| "activation_dim": 3584, | |
| "dict_size": 114688, | |
| "k": 100, | |
| "code_normalization": "CROSSCODER", | |
| "code_normalization_alpha_sae": 1.0, | |
| "code_normalization_alpha_cc": 0.1, | |
| "device": "cuda", | |
| "layer": 20, | |
| "lm_name": "princeton-nlp/gemma-2-9b-it-DPO-gemma-2-9b", | |
| "wandb_name": "gemma-2-9b-L20-k100-lr1e-04-base-dpo-Crosscoder", | |
| "submodule_name": null, | |
| "dict_class_kwargs": { | |
| "same_init_for_all_layers": false, | |
| "norm_init_scale": 0.005, | |
| "init_with_transpose": false, | |
| "encoder_layers": null, | |
| "code_normalization": "crosscoder", | |
| "code_normalization_alpha_sae": 1.0, | |
| "code_normalization_alpha_cc": 0.1 | |
| } | |
| } | |
| } |