Nirvana-pro / nirvana_1_3B.json
YuhuaJiang's picture
initial upload
b510dde verified
raw
history blame contribute delete
825 Bytes
{
"attention_bias": false,
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 2,
"auto_map": {
"AutoConfig": "configuration_transformer_rnn.TransformerConfig_rnn",
"AutoModel": "modeling_transformer_rnn.TransformerModel_rnn",
"AutoModelForCausalLM": "modeling_transformer_rnn.TransformerForCausalLM_rnn"
},
"fuse_cross_entropy": true,
"fuse_norm": true,
"hidden_act": "swish",
"hidden_size": 2048,
"initializer_range": 6e-3,
"max_position_embeddings": 32768,
"rope_theta": 10000.0,
"model_type": "transformer_rnn",
"num_heads": 16,
"num_hidden_layers": 16,
"norm_eps": 1e-06,
"tie_word_embeddings": true,
"use_cache": false,
"vocab_size": 128512,
"concept_dim": 64,
"logit_dim": 32,
"window_size": 2048
}