File size: 1,659 Bytes
34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 eb493e0 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 db54e30 34cc2c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
{
"architectures": [
"Gemma3ForConditionalGeneration"
],
"boi_token_index": 255999,
"bos_token_id": 2,
"eoi_token_index": 256000,
"eos_token_id": 1,
"image_token_index": 262144,
"initializer_range": 0.02,
"mm_tokens_per_image": 256,
"model_type": "gemma3",
"pad_token_id": 0,
"text_config": {
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": null,
"cache_implementation": "hybrid",
"final_logit_softcapping": null,
"head_dim": 128,
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 5376,
"initializer_range": 0.02,
"intermediate_size": 21504,
"max_position_embeddings": 131072,
"model_type": "gemma3_text",
"num_attention_heads": 32,
"num_hidden_layers": 62,
"num_key_value_heads": 16,
"query_pre_attn_scalar": 168,
"rms_norm_eps": 1e-06,
"rope_local_base_freq": 10000.0,
"rope_scaling": {
"factor": 8.0,
"rope_type": "linear"
},
"rope_theta": 1000000.0,
"sliding_window": 1024,
"sliding_window_pattern": 6,
"torch_dtype": "bfloat16",
"use_cache": true,
"vocab_size": 262208
},
"torch_dtype": "bfloat16",
"transformers_version": "4.51.0",
"unsloth_fixed": true,
"vision_config": {
"attention_dropout": 0.0,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"image_size": 896,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 27,
"patch_size": 14,
"torch_dtype": "bfloat16",
"vision_use_head": false
}
}
|