| { | |
| "activation_type": "gelu", | |
| "afrac": 0.25, | |
| "afrac_loctypes": "attn_in,attn_out,mlp_in,mlp_out,mlp_neuron,attn_v,attn_k,attn_q", | |
| "architectures": [ | |
| "CircuitGPTForCausalLM" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "config.CircuitGPTConfig", | |
| "AutoModelForCausalLM": "modeling_circuitgpt.CircuitGPTForCausalLM" | |
| }, | |
| "bias": true, | |
| "bigram_table_rank": null, | |
| "block_size": 1024, | |
| "bos_token_id": null, | |
| "d_head": 16, | |
| "d_mlp": 8192, | |
| "d_model": 2048, | |
| "d_pos_emb": 32, | |
| "dropout": 0.0, | |
| "dropout_cat_pos_emb": false, | |
| "enable_bigram_table": true, | |
| "eos_token_id": 2047, | |
| "flash": true, | |
| "is_decoder": true, | |
| "learnable_bigram_table": true, | |
| "ln_bias": true, | |
| "max_position_embeddings": 1024, | |
| "model_type": "circuitgpt", | |
| "n_head": 128, | |
| "n_layer": 8, | |
| "pad_token_id": null, | |
| "residual_activation_type": "identity", | |
| "rms_norm": true, | |
| "sink": true, | |
| "sinusoidal_cat_pos_emb": false, | |
| "tie_word_embeddings": false, | |
| "tied_unembed": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.49.0", | |
| "unembed_rank": null, | |
| "use_position_embeddings": true, | |
| "vocab_size": 2048 | |
| } |