villekuosmanen's picture
Upload SAE model weights, config, and training state
541bd02 verified
raw
history blame
1.46 kB
{
"num_tokens": 77,
"token_dim": 128,
"expansion_factor": 1.25,
"activation_fn": "relu",
"use_token_sampling": true,
"fixed_tokens": [
0,
1
],
"sampling_strategy": "block_average",
"sampling_stride": 8,
"max_sampled_tokens": 200,
"block_size": 8,
"batch_size": 16,
"learning_rate": 0.0001,
"num_epochs": 20,
"validation_split": 0.1,
"l1_penalty": 0.3,
"optimizer": "adam",
"weight_decay": 1e-05,
"lr_schedule": "constant",
"warmup_epochs": 2,
"gradient_clip_norm": 1.0,
"early_stopping_patience": 10,
"early_stopping_min_delta": 1e-05,
"log_every": 5,
"save_every": 1000,
"validate_every": 500,
"device": "cuda",
"repo_id": "[villekuosmanen/drop_footbag_into_dice_tower, villekuosmanen/drop_footbag_into_dice_tower_continuous, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.0.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.1.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.2.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.3.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.4.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.5.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.6.0, villekuosmanen/eval_footbag_11Sep]",
"repo_hash": "e78b65d9",
"layer_name": "model.encoder.layers.3.norm2",
"activation_cache_path": "/home/ville/.cache/physical_ai_interpretability/sae_activations",
"experiment_name": "sae_eval_footbag_11Sep_e78b65d9"
}