ffurfaro's picture
Upload folder using huggingface_hub
1186208 verified
{
"architectures": [
"TpttModel"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_tptt.TpttConfig",
"AutoModelForCausalLM": "modeling_tptt.TpttModel"
},
"base_model_name": "meta-llama/Llama-3.2-1B",
"base_model_subfolder": null,
"base_scale_attn": null,
"bidirectional": false,
"cross_gate": false,
"head_dim": 64,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 8192,
"linear_precision": "bfloat16",
"lora_config": {
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": null,
"bias": "none",
"corda_config": null,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": false,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_bias": false,
"lora_dropout": 0.05,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"k_proj",
"v_proj",
"o_proj",
"q_proj"
],
"task_type": "CAUSAL_LM",
"trainable_token_indices": null,
"use_dora": false,
"use_rslora": false
},
"mag_weight": 0.5,
"max_chunk_size": 64,
"max_position_embeddings": 131072,
"max_self_attn_length": null,
"mlp_bias": false,
"model_type": "tptt",
"num_attention_heads": 32,
"num_hidden_layers": 16,
"num_key_value_heads": 8,
"operator_mode": "delta_product_r",
"padding_side": "right",
"pooling_config": null,
"pretraining_tp": 1,
"recurrent_config": {
"gate_type": "k",
"linear": true,
"order": 2,
"trick": "rotative"
},
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 32.0,
"high_freq_factor": 4.0,
"low_freq_factor": 1.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"target_modules_names": [
"attn",
"self_attn",
"attention"
],
"torch_dtype": "bfloat16",
"transformers_version": "4.49.0",
"use_cache": true,
"use_linear_checkpoint": false,
"vocab_size": 128256
}