| { | |
| "num_tokens": 77, | |
| "token_dim": 128, | |
| "expansion_factor": 1.25, | |
| "activation_fn": "relu", | |
| "use_token_sampling": true, | |
| "fixed_tokens": [ | |
| 0, | |
| 1 | |
| ], | |
| "sampling_strategy": "block_average", | |
| "sampling_stride": 8, | |
| "max_sampled_tokens": 200, | |
| "block_size": 8, | |
| "batch_size": 16, | |
| "learning_rate": 0.0001, | |
| "num_epochs": 20, | |
| "validation_split": 0.1, | |
| "l1_penalty": 0.3, | |
| "optimizer": "adam", | |
| "weight_decay": 1e-05, | |
| "lr_schedule": "constant", | |
| "warmup_epochs": 2, | |
| "gradient_clip_norm": 1.0, | |
| "early_stopping_patience": 10, | |
| "early_stopping_min_delta": 1e-05, | |
| "log_every": 5, | |
| "save_every": 1000, | |
| "validate_every": 500, | |
| "device": "cuda", | |
| "repo_id": "[villekuosmanen/drop_footbag_into_dice_tower, villekuosmanen/drop_footbag_into_dice_tower_continuous, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.0.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.1.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.2.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.3.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.4.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.5.0, villekuosmanen/dAgger_drop_footbag_into_dice_tower_1.6.0, villekuosmanen/eval_footbag_11Sep]", | |
| "repo_hash": "e78b65d9", | |
| "layer_name": "model.encoder.layers.3.norm2", | |
| "activation_cache_path": "/home/ville/.cache/physical_ai_interpretability/sae_activations", | |
| "experiment_name": "sae_eval_footbag_11Sep_e78b65d9" | |
| } |