Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- SYSTEM_PROMPT.txt +3 -0
- config.json +61 -0
- generation_config.json +11 -0
- model-00001-of-00004.safetensors +3 -0
- model-00002-of-00004.safetensors +3 -0
- model-00003-of-00004.safetensors +3 -0
- model-00004-of-00004.safetensors +3 -0
- model.safetensors.index.json +0 -0
- quantization_config.json +15 -0
- tekken.json +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tekken.json filter=lfs diff=lfs merge=lfs -text
|
SYSTEM_PROMPT.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.
|
| 2 |
+
|
| 3 |
+
Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response. Use the same language as the input.[/THINK]Here, provide a self-contained response.
|
config.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Mistral3ForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"image_token_index": 10,
|
| 7 |
+
"model_type": "mistral3",
|
| 8 |
+
"multimodal_projector_bias": false,
|
| 9 |
+
"projector_hidden_act": "gelu",
|
| 10 |
+
"quantization_config": {
|
| 11 |
+
"autoround_version": "0.9.0",
|
| 12 |
+
"batch_size": 1,
|
| 13 |
+
"bits": 4,
|
| 14 |
+
"block_name_to_quantize": "model.language_model.layers",
|
| 15 |
+
"data_type": "int",
|
| 16 |
+
"dataset": "liuhaotian/llava_conv_58k",
|
| 17 |
+
"gradient_accumulate_steps": 8,
|
| 18 |
+
"group_size": 128,
|
| 19 |
+
"packing_format": "auto_round:auto_gptq",
|
| 20 |
+
"quant_method": "auto-round",
|
| 21 |
+
"scale_dtype": "None",
|
| 22 |
+
"seqlen": 512,
|
| 23 |
+
"sym": true
|
| 24 |
+
},
|
| 25 |
+
"spatial_merge_size": 2,
|
| 26 |
+
"text_config": {
|
| 27 |
+
"attention_dropout": 0.0,
|
| 28 |
+
"head_dim": 128,
|
| 29 |
+
"hidden_act": "silu",
|
| 30 |
+
"hidden_size": 5120,
|
| 31 |
+
"initializer_range": 0.02,
|
| 32 |
+
"intermediate_size": 32768,
|
| 33 |
+
"max_position_embeddings": 131072,
|
| 34 |
+
"model_type": "mistral",
|
| 35 |
+
"num_attention_heads": 32,
|
| 36 |
+
"num_hidden_layers": 40,
|
| 37 |
+
"num_key_value_heads": 8,
|
| 38 |
+
"rms_norm_eps": 1e-05,
|
| 39 |
+
"rope_theta": 1000000000.0,
|
| 40 |
+
"sliding_window": null,
|
| 41 |
+
"use_cache": true,
|
| 42 |
+
"vocab_size": 131072
|
| 43 |
+
},
|
| 44 |
+
"transformers_version": "4.57.1",
|
| 45 |
+
"vision_config": {
|
| 46 |
+
"attention_dropout": 0.0,
|
| 47 |
+
"head_dim": 64,
|
| 48 |
+
"hidden_act": "silu",
|
| 49 |
+
"hidden_size": 1024,
|
| 50 |
+
"image_size": 1540,
|
| 51 |
+
"initializer_range": 0.02,
|
| 52 |
+
"intermediate_size": 4096,
|
| 53 |
+
"model_type": "pixtral",
|
| 54 |
+
"num_attention_heads": 16,
|
| 55 |
+
"num_channels": 3,
|
| 56 |
+
"num_hidden_layers": 24,
|
| 57 |
+
"patch_size": 14,
|
| 58 |
+
"rope_theta": 10000.0
|
| 59 |
+
},
|
| 60 |
+
"vision_feature_layer": -1
|
| 61 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"eos_token_id": 2,
|
| 6 |
+
"max_new_tokens": 131072,
|
| 7 |
+
"pad_token_id": 11,
|
| 8 |
+
"temperature": 0.7,
|
| 9 |
+
"top_p": 0.95,
|
| 10 |
+
"transformers_version": "4.57.1"
|
| 11 |
+
}
|
model-00001-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e0dd486b800237cb5594befaf1f12b22c70b7dff93def699b96bb124e596bc9
|
| 3 |
+
size 4933281064
|
model-00002-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6201b9c257a0f1b385437bb082f4dae95813a686a6a75db6e907ff4cdee71638
|
| 3 |
+
size 4995921624
|
model-00003-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61ee99fa3b823359bce563a6a32baf17d8c57097d30e8f8843889e6218028b80
|
| 3 |
+
size 3840950744
|
model-00004-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:487158e25eb90313f1f57a1894a260addf66dd6526984393fe8150abbe3c7a22
|
| 3 |
+
size 1342177424
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
quantization_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bits": 4,
|
| 3 |
+
"group_size": 128,
|
| 4 |
+
"sym": true,
|
| 5 |
+
"data_type": "int",
|
| 6 |
+
"seqlen": 512,
|
| 7 |
+
"batch_size": 1,
|
| 8 |
+
"scale_dtype": "None",
|
| 9 |
+
"gradient_accumulate_steps": 8,
|
| 10 |
+
"dataset": "liuhaotian/llava_conv_58k",
|
| 11 |
+
"autoround_version": "0.9.0",
|
| 12 |
+
"block_name_to_quantize": "model.language_model.layers",
|
| 13 |
+
"quant_method": "auto-round",
|
| 14 |
+
"packing_format": "auto_round:auto_gptq"
|
| 15 |
+
}
|
tekken.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05e051a32ae9d6333459da0083d36507116334fc85378321cfb32dfff7187531
|
| 3 |
+
size 19399765
|