danielhanchen commited on
Commit
205b67d
·
verified ·
1 Parent(s): 7915eda

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +8 -49
config.json CHANGED
@@ -3,19 +3,18 @@
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
6
- "bos_token_id": 2,
7
- "dtype": "bfloat16",
8
  "eoi_token_index": 256000,
9
- "eos_token_id": 106,
 
 
 
10
  "image_token_index": 262144,
11
  "initializer_range": 0.02,
12
  "mm_tokens_per_image": 256,
13
  "model_type": "gemma3",
14
- "pad_token_id": 0,
15
  "quantization_config": {
16
  "config_groups": {
17
  "group_0": {
18
- "format": "float-quantized",
19
  "input_activations": {
20
  "actorder": null,
21
  "block_structure": null,
@@ -47,7 +46,7 @@
47
  }
48
  },
49
  "format": "float-quantized",
50
- "global_compression_ratio": null,
51
  "ignore": [
52
  "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj",
53
  "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj",
@@ -215,57 +214,19 @@
215
  ],
216
  "kv_cache_scheme": null,
217
  "quant_method": "compressed-tensors",
218
- "quantization_status": "compressed",
219
  },
220
  "text_config": {
221
- "_sliding_window_pattern": 6,
222
  "attention_bias": false,
223
  "attention_dropout": 0.0,
224
  "attn_logit_softcapping": null,
225
  "cache_implementation": "hybrid",
226
- "dtype": "bfloat16",
227
  "final_logit_softcapping": null,
228
  "head_dim": 256,
229
  "hidden_activation": "gelu_pytorch_tanh",
230
  "hidden_size": 2560,
231
  "initializer_range": 0.02,
232
  "intermediate_size": 10240,
233
- "layer_types": [
234
- "sliding_attention",
235
- "sliding_attention",
236
- "sliding_attention",
237
- "sliding_attention",
238
- "sliding_attention",
239
- "full_attention",
240
- "sliding_attention",
241
- "sliding_attention",
242
- "sliding_attention",
243
- "sliding_attention",
244
- "sliding_attention",
245
- "full_attention",
246
- "sliding_attention",
247
- "sliding_attention",
248
- "sliding_attention",
249
- "sliding_attention",
250
- "sliding_attention",
251
- "full_attention",
252
- "sliding_attention",
253
- "sliding_attention",
254
- "sliding_attention",
255
- "sliding_attention",
256
- "sliding_attention",
257
- "full_attention",
258
- "sliding_attention",
259
- "sliding_attention",
260
- "sliding_attention",
261
- "sliding_attention",
262
- "sliding_attention",
263
- "full_attention",
264
- "sliding_attention",
265
- "sliding_attention",
266
- "sliding_attention",
267
- "sliding_attention"
268
- ],
269
  "max_position_embeddings": 131072,
270
  "model_type": "gemma3_text",
271
  "num_attention_heads": 8,
@@ -281,15 +242,13 @@
281
  "rope_theta": 1000000.0,
282
  "sliding_window": 1024,
283
  "sliding_window_pattern": 6,
284
- "use_bidirectional_attention": false,
285
  "use_cache": true,
286
  "vocab_size": 262208
287
  },
288
- "transformers_version": "4.57.1",
289
- "unsloth_fixed": true,
290
  "vision_config": {
291
  "attention_dropout": 0.0,
292
- "dtype": "bfloat16",
293
  "hidden_act": "gelu_pytorch_tanh",
294
  "hidden_size": 1152,
295
  "image_size": 896,
 
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
 
 
6
  "eoi_token_index": 256000,
7
+ "eos_token_id": [
8
+ 1,
9
+ 106
10
+ ],
11
  "image_token_index": 262144,
12
  "initializer_range": 0.02,
13
  "mm_tokens_per_image": 256,
14
  "model_type": "gemma3",
 
15
  "quantization_config": {
16
  "config_groups": {
17
  "group_0": {
 
18
  "input_activations": {
19
  "actorder": null,
20
  "block_structure": null,
 
46
  }
47
  },
48
  "format": "float-quantized",
49
+ "global_compression_ratio": 1.1950546068196743,
50
  "ignore": [
51
  "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj",
52
  "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj",
 
214
  ],
215
  "kv_cache_scheme": null,
216
  "quant_method": "compressed-tensors",
217
+ "quantization_status": "compressed"
218
  },
219
  "text_config": {
 
220
  "attention_bias": false,
221
  "attention_dropout": 0.0,
222
  "attn_logit_softcapping": null,
223
  "cache_implementation": "hybrid",
 
224
  "final_logit_softcapping": null,
225
  "head_dim": 256,
226
  "hidden_activation": "gelu_pytorch_tanh",
227
  "hidden_size": 2560,
228
  "initializer_range": 0.02,
229
  "intermediate_size": 10240,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  "max_position_embeddings": 131072,
231
  "model_type": "gemma3_text",
232
  "num_attention_heads": 8,
 
242
  "rope_theta": 1000000.0,
243
  "sliding_window": 1024,
244
  "sliding_window_pattern": 6,
 
245
  "use_cache": true,
246
  "vocab_size": 262208
247
  },
248
+ "torch_dtype": "bfloat16",
249
+ "transformers_version": "4.50.0",
250
  "vision_config": {
251
  "attention_dropout": 0.0,
 
252
  "hidden_act": "gelu_pytorch_tanh",
253
  "hidden_size": 1152,
254
  "image_size": 896,