n1ck-guo commited on
Commit
0b0024b
·
verified ·
1 Parent(s): 8db3a11

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tekken.json filter=lfs diff=lfs merge=lfs -text
SYSTEM_PROMPT.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.
2
+
3
+ Your thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response. Use the same language as the input.[/THINK]Here, provide a self-contained response.
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Mistral3ForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "image_token_index": 10,
7
+ "model_type": "mistral3",
8
+ "multimodal_projector_bias": false,
9
+ "projector_hidden_act": "gelu",
10
+ "quantization_config": {
11
+ "autoround_version": "0.9.0",
12
+ "batch_size": 1,
13
+ "bits": 4,
14
+ "block_name_to_quantize": "model.language_model.layers",
15
+ "data_type": "int",
16
+ "dataset": "liuhaotian/llava_conv_58k",
17
+ "gradient_accumulate_steps": 8,
18
+ "group_size": 128,
19
+ "packing_format": "auto_round:auto_gptq",
20
+ "quant_method": "auto-round",
21
+ "scale_dtype": "None",
22
+ "seqlen": 512,
23
+ "sym": true
24
+ },
25
+ "spatial_merge_size": 2,
26
+ "text_config": {
27
+ "attention_dropout": 0.0,
28
+ "head_dim": 128,
29
+ "hidden_act": "silu",
30
+ "hidden_size": 5120,
31
+ "initializer_range": 0.02,
32
+ "intermediate_size": 32768,
33
+ "max_position_embeddings": 131072,
34
+ "model_type": "mistral",
35
+ "num_attention_heads": 32,
36
+ "num_hidden_layers": 40,
37
+ "num_key_value_heads": 8,
38
+ "rms_norm_eps": 1e-05,
39
+ "rope_theta": 1000000000.0,
40
+ "sliding_window": null,
41
+ "use_cache": true,
42
+ "vocab_size": 131072
43
+ },
44
+ "transformers_version": "4.57.1",
45
+ "vision_config": {
46
+ "attention_dropout": 0.0,
47
+ "head_dim": 64,
48
+ "hidden_act": "silu",
49
+ "hidden_size": 1024,
50
+ "image_size": 1540,
51
+ "initializer_range": 0.02,
52
+ "intermediate_size": 4096,
53
+ "model_type": "pixtral",
54
+ "num_attention_heads": 16,
55
+ "num_channels": 3,
56
+ "num_hidden_layers": 24,
57
+ "patch_size": 14,
58
+ "rope_theta": 10000.0
59
+ },
60
+ "vision_feature_layer": -1
61
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": 2,
6
+ "max_new_tokens": 131072,
7
+ "pad_token_id": 11,
8
+ "temperature": 0.7,
9
+ "top_p": 0.95,
10
+ "transformers_version": "4.57.1"
11
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0dd486b800237cb5594befaf1f12b22c70b7dff93def699b96bb124e596bc9
3
+ size 4933281064
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6201b9c257a0f1b385437bb082f4dae95813a686a6a75db6e907ff4cdee71638
3
+ size 4995921624
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ee99fa3b823359bce563a6a32baf17d8c57097d30e8f8843889e6218028b80
3
+ size 3840950744
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:487158e25eb90313f1f57a1894a260addf66dd6526984393fe8150abbe3c7a22
3
+ size 1342177424
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quantization_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "sym": true,
5
+ "data_type": "int",
6
+ "seqlen": 512,
7
+ "batch_size": 1,
8
+ "scale_dtype": "None",
9
+ "gradient_accumulate_steps": 8,
10
+ "dataset": "liuhaotian/llava_conv_58k",
11
+ "autoround_version": "0.9.0",
12
+ "block_name_to_quantize": "model.language_model.layers",
13
+ "quant_method": "auto-round",
14
+ "packing_format": "auto_round:auto_gptq"
15
+ }
tekken.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e051a32ae9d6333459da0083d36507116334fc85378321cfb32dfff7187531
3
+ size 19399765