danielhanchen commited on
Commit
20e4d42
·
verified ·
1 Parent(s): 205b67d

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +215 -171
config.json CHANGED
@@ -3,18 +3,19 @@
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
 
 
6
  "eoi_token_index": 256000,
7
- "eos_token_id": [
8
- 1,
9
- 106
10
- ],
11
  "image_token_index": 262144,
12
  "initializer_range": 0.02,
13
  "mm_tokens_per_image": 256,
14
  "model_type": "gemma3",
 
15
  "quantization_config": {
16
  "config_groups": {
17
  "group_0": {
 
18
  "input_activations": {
19
  "actorder": null,
20
  "block_structure": null,
@@ -46,187 +47,228 @@
46
  }
47
  },
48
  "format": "float-quantized",
49
- "global_compression_ratio": 1.1950546068196743,
50
  "ignore": [
51
- "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj",
52
- "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj",
53
- "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj",
54
- "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj",
55
- "vision_tower.vision_model.encoder.layers.0.mlp.fc1",
56
- "vision_tower.vision_model.encoder.layers.0.mlp.fc2",
57
- "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj",
58
- "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj",
59
- "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj",
60
- "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj",
61
- "vision_tower.vision_model.encoder.layers.1.mlp.fc1",
62
- "vision_tower.vision_model.encoder.layers.1.mlp.fc2",
63
- "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj",
64
- "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj",
65
- "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj",
66
- "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj",
67
- "vision_tower.vision_model.encoder.layers.2.mlp.fc1",
68
- "vision_tower.vision_model.encoder.layers.2.mlp.fc2",
69
- "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj",
70
- "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj",
71
- "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj",
72
- "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj",
73
- "vision_tower.vision_model.encoder.layers.3.mlp.fc1",
74
- "vision_tower.vision_model.encoder.layers.3.mlp.fc2",
75
- "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj",
76
- "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj",
77
- "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj",
78
- "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj",
79
- "vision_tower.vision_model.encoder.layers.4.mlp.fc1",
80
- "vision_tower.vision_model.encoder.layers.4.mlp.fc2",
81
- "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj",
82
- "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj",
83
- "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj",
84
- "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj",
85
- "vision_tower.vision_model.encoder.layers.5.mlp.fc1",
86
- "vision_tower.vision_model.encoder.layers.5.mlp.fc2",
87
- "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj",
88
- "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj",
89
- "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj",
90
- "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj",
91
- "vision_tower.vision_model.encoder.layers.6.mlp.fc1",
92
- "vision_tower.vision_model.encoder.layers.6.mlp.fc2",
93
- "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj",
94
- "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj",
95
- "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj",
96
- "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj",
97
- "vision_tower.vision_model.encoder.layers.7.mlp.fc1",
98
- "vision_tower.vision_model.encoder.layers.7.mlp.fc2",
99
- "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj",
100
- "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj",
101
- "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj",
102
- "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj",
103
- "vision_tower.vision_model.encoder.layers.8.mlp.fc1",
104
- "vision_tower.vision_model.encoder.layers.8.mlp.fc2",
105
- "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj",
106
- "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj",
107
- "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj",
108
- "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj",
109
- "vision_tower.vision_model.encoder.layers.9.mlp.fc1",
110
- "vision_tower.vision_model.encoder.layers.9.mlp.fc2",
111
- "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj",
112
- "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj",
113
- "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj",
114
- "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj",
115
- "vision_tower.vision_model.encoder.layers.10.mlp.fc1",
116
- "vision_tower.vision_model.encoder.layers.10.mlp.fc2",
117
- "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj",
118
- "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj",
119
- "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj",
120
- "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj",
121
- "vision_tower.vision_model.encoder.layers.11.mlp.fc1",
122
- "vision_tower.vision_model.encoder.layers.11.mlp.fc2",
123
- "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj",
124
- "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj",
125
- "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj",
126
- "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj",
127
- "vision_tower.vision_model.encoder.layers.12.mlp.fc1",
128
- "vision_tower.vision_model.encoder.layers.12.mlp.fc2",
129
- "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj",
130
- "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj",
131
- "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj",
132
- "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj",
133
- "vision_tower.vision_model.encoder.layers.13.mlp.fc1",
134
- "vision_tower.vision_model.encoder.layers.13.mlp.fc2",
135
- "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj",
136
- "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj",
137
- "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj",
138
- "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj",
139
- "vision_tower.vision_model.encoder.layers.14.mlp.fc1",
140
- "vision_tower.vision_model.encoder.layers.14.mlp.fc2",
141
- "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj",
142
- "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj",
143
- "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj",
144
- "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj",
145
- "vision_tower.vision_model.encoder.layers.15.mlp.fc1",
146
- "vision_tower.vision_model.encoder.layers.15.mlp.fc2",
147
- "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj",
148
- "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj",
149
- "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj",
150
- "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj",
151
- "vision_tower.vision_model.encoder.layers.16.mlp.fc1",
152
- "vision_tower.vision_model.encoder.layers.16.mlp.fc2",
153
- "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj",
154
- "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj",
155
- "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj",
156
- "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj",
157
- "vision_tower.vision_model.encoder.layers.17.mlp.fc1",
158
- "vision_tower.vision_model.encoder.layers.17.mlp.fc2",
159
- "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj",
160
- "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj",
161
- "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj",
162
- "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj",
163
- "vision_tower.vision_model.encoder.layers.18.mlp.fc1",
164
- "vision_tower.vision_model.encoder.layers.18.mlp.fc2",
165
- "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj",
166
- "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj",
167
- "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj",
168
- "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj",
169
- "vision_tower.vision_model.encoder.layers.19.mlp.fc1",
170
- "vision_tower.vision_model.encoder.layers.19.mlp.fc2",
171
- "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj",
172
- "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj",
173
- "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj",
174
- "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj",
175
- "vision_tower.vision_model.encoder.layers.20.mlp.fc1",
176
- "vision_tower.vision_model.encoder.layers.20.mlp.fc2",
177
- "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj",
178
- "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj",
179
- "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj",
180
- "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj",
181
- "vision_tower.vision_model.encoder.layers.21.mlp.fc1",
182
- "vision_tower.vision_model.encoder.layers.21.mlp.fc2",
183
- "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj",
184
- "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj",
185
- "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj",
186
- "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj",
187
- "vision_tower.vision_model.encoder.layers.22.mlp.fc1",
188
- "vision_tower.vision_model.encoder.layers.22.mlp.fc2",
189
- "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj",
190
- "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj",
191
- "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj",
192
- "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj",
193
- "vision_tower.vision_model.encoder.layers.23.mlp.fc1",
194
- "vision_tower.vision_model.encoder.layers.23.mlp.fc2",
195
- "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj",
196
- "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj",
197
- "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj",
198
- "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj",
199
- "vision_tower.vision_model.encoder.layers.24.mlp.fc1",
200
- "vision_tower.vision_model.encoder.layers.24.mlp.fc2",
201
- "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj",
202
- "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj",
203
- "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj",
204
- "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj",
205
- "vision_tower.vision_model.encoder.layers.25.mlp.fc1",
206
- "vision_tower.vision_model.encoder.layers.25.mlp.fc2",
207
- "vision_tower.vision_model.encoder.layers.26.self_attn.k_proj",
208
- "vision_tower.vision_model.encoder.layers.26.self_attn.v_proj",
209
- "vision_tower.vision_model.encoder.layers.26.self_attn.q_proj",
210
- "vision_tower.vision_model.encoder.layers.26.self_attn.out_proj",
211
- "vision_tower.vision_model.encoder.layers.26.mlp.fc1",
212
- "vision_tower.vision_model.encoder.layers.26.mlp.fc2",
213
- "language_model.lm_head"
214
  ],
215
  "kv_cache_scheme": null,
216
  "quant_method": "compressed-tensors",
217
- "quantization_status": "compressed"
 
 
 
218
  },
219
  "text_config": {
 
220
  "attention_bias": false,
221
  "attention_dropout": 0.0,
222
  "attn_logit_softcapping": null,
223
  "cache_implementation": "hybrid",
 
224
  "final_logit_softcapping": null,
225
  "head_dim": 256,
226
  "hidden_activation": "gelu_pytorch_tanh",
227
  "hidden_size": 2560,
228
  "initializer_range": 0.02,
229
  "intermediate_size": 10240,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  "max_position_embeddings": 131072,
231
  "model_type": "gemma3_text",
232
  "num_attention_heads": 8,
@@ -242,13 +284,15 @@
242
  "rope_theta": 1000000.0,
243
  "sliding_window": 1024,
244
  "sliding_window_pattern": 6,
 
245
  "use_cache": true,
246
  "vocab_size": 262208
247
  },
248
- "torch_dtype": "bfloat16",
249
- "transformers_version": "4.50.0",
250
  "vision_config": {
251
  "attention_dropout": 0.0,
 
252
  "hidden_act": "gelu_pytorch_tanh",
253
  "hidden_size": 1152,
254
  "image_size": 896,
 
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
6
+ "bos_token_id": 2,
7
+ "dtype": "bfloat16",
8
  "eoi_token_index": 256000,
9
+ "eos_token_id": 106,
 
 
 
10
  "image_token_index": 262144,
11
  "initializer_range": 0.02,
12
  "mm_tokens_per_image": 256,
13
  "model_type": "gemma3",
14
+ "pad_token_id": 0,
15
  "quantization_config": {
16
  "config_groups": {
17
  "group_0": {
18
+ "format": "float-quantized",
19
  "input_activations": {
20
  "actorder": null,
21
  "block_structure": null,
 
47
  }
48
  },
49
  "format": "float-quantized",
50
+ "global_compression_ratio": null,
51
  "ignore": [
52
+ "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj",
53
+ "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj",
54
+ "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj",
55
+ "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj",
56
+ "model.vision_tower.vision_model.encoder.layers.0.mlp.fc1",
57
+ "model.vision_tower.vision_model.encoder.layers.0.mlp.fc2",
58
+ "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj",
59
+ "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj",
60
+ "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj",
61
+ "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj",
62
+ "model.vision_tower.vision_model.encoder.layers.1.mlp.fc1",
63
+ "model.vision_tower.vision_model.encoder.layers.1.mlp.fc2",
64
+ "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj",
65
+ "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj",
66
+ "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj",
67
+ "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj",
68
+ "model.vision_tower.vision_model.encoder.layers.2.mlp.fc1",
69
+ "model.vision_tower.vision_model.encoder.layers.2.mlp.fc2",
70
+ "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj",
71
+ "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj",
72
+ "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj",
73
+ "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj",
74
+ "model.vision_tower.vision_model.encoder.layers.3.mlp.fc1",
75
+ "model.vision_tower.vision_model.encoder.layers.3.mlp.fc2",
76
+ "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj",
77
+ "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj",
78
+ "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj",
79
+ "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj",
80
+ "model.vision_tower.vision_model.encoder.layers.4.mlp.fc1",
81
+ "model.vision_tower.vision_model.encoder.layers.4.mlp.fc2",
82
+ "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj",
83
+ "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj",
84
+ "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj",
85
+ "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj",
86
+ "model.vision_tower.vision_model.encoder.layers.5.mlp.fc1",
87
+ "model.vision_tower.vision_model.encoder.layers.5.mlp.fc2",
88
+ "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj",
89
+ "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj",
90
+ "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj",
91
+ "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj",
92
+ "model.vision_tower.vision_model.encoder.layers.6.mlp.fc1",
93
+ "model.vision_tower.vision_model.encoder.layers.6.mlp.fc2",
94
+ "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj",
95
+ "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj",
96
+ "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj",
97
+ "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj",
98
+ "model.vision_tower.vision_model.encoder.layers.7.mlp.fc1",
99
+ "model.vision_tower.vision_model.encoder.layers.7.mlp.fc2",
100
+ "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj",
101
+ "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj",
102
+ "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj",
103
+ "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj",
104
+ "model.vision_tower.vision_model.encoder.layers.8.mlp.fc1",
105
+ "model.vision_tower.vision_model.encoder.layers.8.mlp.fc2",
106
+ "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj",
107
+ "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj",
108
+ "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj",
109
+ "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj",
110
+ "model.vision_tower.vision_model.encoder.layers.9.mlp.fc1",
111
+ "model.vision_tower.vision_model.encoder.layers.9.mlp.fc2",
112
+ "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj",
113
+ "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj",
114
+ "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj",
115
+ "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj",
116
+ "model.vision_tower.vision_model.encoder.layers.10.mlp.fc1",
117
+ "model.vision_tower.vision_model.encoder.layers.10.mlp.fc2",
118
+ "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj",
119
+ "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj",
120
+ "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj",
121
+ "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj",
122
+ "model.vision_tower.vision_model.encoder.layers.11.mlp.fc1",
123
+ "model.vision_tower.vision_model.encoder.layers.11.mlp.fc2",
124
+ "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj",
125
+ "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj",
126
+ "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj",
127
+ "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj",
128
+ "model.vision_tower.vision_model.encoder.layers.12.mlp.fc1",
129
+ "model.vision_tower.vision_model.encoder.layers.12.mlp.fc2",
130
+ "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj",
131
+ "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj",
132
+ "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj",
133
+ "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj",
134
+ "model.vision_tower.vision_model.encoder.layers.13.mlp.fc1",
135
+ "model.vision_tower.vision_model.encoder.layers.13.mlp.fc2",
136
+ "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj",
137
+ "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj",
138
+ "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj",
139
+ "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj",
140
+ "model.vision_tower.vision_model.encoder.layers.14.mlp.fc1",
141
+ "model.vision_tower.vision_model.encoder.layers.14.mlp.fc2",
142
+ "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj",
143
+ "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj",
144
+ "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj",
145
+ "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj",
146
+ "model.vision_tower.vision_model.encoder.layers.15.mlp.fc1",
147
+ "model.vision_tower.vision_model.encoder.layers.15.mlp.fc2",
148
+ "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj",
149
+ "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj",
150
+ "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj",
151
+ "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj",
152
+ "model.vision_tower.vision_model.encoder.layers.16.mlp.fc1",
153
+ "model.vision_tower.vision_model.encoder.layers.16.mlp.fc2",
154
+ "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj",
155
+ "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj",
156
+ "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj",
157
+ "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj",
158
+ "model.vision_tower.vision_model.encoder.layers.17.mlp.fc1",
159
+ "model.vision_tower.vision_model.encoder.layers.17.mlp.fc2",
160
+ "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj",
161
+ "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj",
162
+ "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj",
163
+ "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj",
164
+ "model.vision_tower.vision_model.encoder.layers.18.mlp.fc1",
165
+ "model.vision_tower.vision_model.encoder.layers.18.mlp.fc2",
166
+ "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj",
167
+ "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj",
168
+ "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj",
169
+ "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj",
170
+ "model.vision_tower.vision_model.encoder.layers.19.mlp.fc1",
171
+ "model.vision_tower.vision_model.encoder.layers.19.mlp.fc2",
172
+ "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj",
173
+ "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj",
174
+ "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj",
175
+ "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj",
176
+ "model.vision_tower.vision_model.encoder.layers.20.mlp.fc1",
177
+ "model.vision_tower.vision_model.encoder.layers.20.mlp.fc2",
178
+ "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj",
179
+ "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj",
180
+ "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj",
181
+ "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj",
182
+ "model.vision_tower.vision_model.encoder.layers.21.mlp.fc1",
183
+ "model.vision_tower.vision_model.encoder.layers.21.mlp.fc2",
184
+ "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj",
185
+ "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj",
186
+ "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj",
187
+ "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj",
188
+ "model.vision_tower.vision_model.encoder.layers.22.mlp.fc1",
189
+ "model.vision_tower.vision_model.encoder.layers.22.mlp.fc2",
190
+ "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj",
191
+ "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj",
192
+ "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj",
193
+ "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj",
194
+ "model.vision_tower.vision_model.encoder.layers.23.mlp.fc1",
195
+ "model.vision_tower.vision_model.encoder.layers.23.mlp.fc2",
196
+ "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj",
197
+ "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj",
198
+ "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj",
199
+ "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj",
200
+ "model.vision_tower.vision_model.encoder.layers.24.mlp.fc1",
201
+ "model.vision_tower.vision_model.encoder.layers.24.mlp.fc2",
202
+ "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj",
203
+ "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj",
204
+ "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj",
205
+ "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj",
206
+ "model.vision_tower.vision_model.encoder.layers.25.mlp.fc1",
207
+ "model.vision_tower.vision_model.encoder.layers.25.mlp.fc2",
208
+ "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj",
209
+ "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj",
210
+ "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj",
211
+ "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj",
212
+ "model.vision_tower.vision_model.encoder.layers.26.mlp.fc1",
213
+ "model.vision_tower.vision_model.encoder.layers.26.mlp.fc2",
214
+ "lm_head"
215
  ],
216
  "kv_cache_scheme": null,
217
  "quant_method": "compressed-tensors",
218
+ "quantization_status": "compressed",
219
+ "sparsity_config": {},
220
+ "transform_config": {},
221
+ "version": "0.12.3.a20251114"
222
  },
223
  "text_config": {
224
+ "_sliding_window_pattern": 6,
225
  "attention_bias": false,
226
  "attention_dropout": 0.0,
227
  "attn_logit_softcapping": null,
228
  "cache_implementation": "hybrid",
229
+ "dtype": "bfloat16",
230
  "final_logit_softcapping": null,
231
  "head_dim": 256,
232
  "hidden_activation": "gelu_pytorch_tanh",
233
  "hidden_size": 2560,
234
  "initializer_range": 0.02,
235
  "intermediate_size": 10240,
236
+ "layer_types": [
237
+ "sliding_attention",
238
+ "sliding_attention",
239
+ "sliding_attention",
240
+ "sliding_attention",
241
+ "sliding_attention",
242
+ "full_attention",
243
+ "sliding_attention",
244
+ "sliding_attention",
245
+ "sliding_attention",
246
+ "sliding_attention",
247
+ "sliding_attention",
248
+ "full_attention",
249
+ "sliding_attention",
250
+ "sliding_attention",
251
+ "sliding_attention",
252
+ "sliding_attention",
253
+ "sliding_attention",
254
+ "full_attention",
255
+ "sliding_attention",
256
+ "sliding_attention",
257
+ "sliding_attention",
258
+ "sliding_attention",
259
+ "sliding_attention",
260
+ "full_attention",
261
+ "sliding_attention",
262
+ "sliding_attention",
263
+ "sliding_attention",
264
+ "sliding_attention",
265
+ "sliding_attention",
266
+ "full_attention",
267
+ "sliding_attention",
268
+ "sliding_attention",
269
+ "sliding_attention",
270
+ "sliding_attention"
271
+ ],
272
  "max_position_embeddings": 131072,
273
  "model_type": "gemma3_text",
274
  "num_attention_heads": 8,
 
284
  "rope_theta": 1000000.0,
285
  "sliding_window": 1024,
286
  "sliding_window_pattern": 6,
287
+ "use_bidirectional_attention": false,
288
  "use_cache": true,
289
  "vocab_size": 262208
290
  },
291
+ "transformers_version": "4.57.1",
292
+ "unsloth_fixed": true,
293
  "vision_config": {
294
  "attention_dropout": 0.0,
295
+ "dtype": "bfloat16",
296
  "hidden_act": "gelu_pytorch_tanh",
297
  "hidden_size": 1152,
298
  "image_size": 896,