| { | |
| "head_dim": 128, | |
| "metadata": { | |
| "dataset": "kmfoda/booksum", | |
| "model": "Qwen/Qwen3-8B", | |
| "n_future_positions": 100, | |
| "n_samples": 100, | |
| "n_sink": 4 | |
| }, | |
| "num_heads": 32, | |
| "num_layers": 36 | |
| } |
| { | |
| "head_dim": 128, | |
| "metadata": { | |
| "dataset": "kmfoda/booksum", | |
| "model": "Qwen/Qwen3-8B", | |
| "n_future_positions": 100, | |
| "n_samples": 100, | |
| "n_sink": 4 | |
| }, | |
| "num_heads": 32, | |
| "num_layers": 36 | |
| } |