tbhot3ww commited on
Commit
b3fa50b
·
verified ·
1 Parent(s): 9f5e7a0

NVFP4 export (modelopt_fp4) for vLLM

Browse files
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MistralForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 1,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 2,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 12288,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 131072,
15
+ "model_type": "mistral",
16
+ "num_attention_heads": 96,
17
+ "num_hidden_layers": 88,
18
+ "num_key_value_heads": 8,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": false,
23
+ "transformers_version": "4.57.1",
24
+ "use_cache": true,
25
+ "vocab_size": 32768,
26
+ "quantization_config": {
27
+ "config_groups": {
28
+ "group_0": {
29
+ "input_activations": {
30
+ "dynamic": false,
31
+ "num_bits": 4,
32
+ "type": "float",
33
+ "group_size": 16
34
+ },
35
+ "weights": {
36
+ "dynamic": false,
37
+ "num_bits": 4,
38
+ "type": "float",
39
+ "group_size": 16
40
+ },
41
+ "targets": [
42
+ "Linear"
43
+ ]
44
+ }
45
+ },
46
+ "ignore": [
47
+ "lm_head"
48
+ ],
49
+ "quant_algo": "NVFP4",
50
+ "kv_cache_scheme": {
51
+ "dynamic": false,
52
+ "num_bits": 8,
53
+ "type": "float"
54
+ },
55
+ "producer": {
56
+ "name": "modelopt",
57
+ "version": "0.35.0"
58
+ },
59
+ "quant_method": "modelopt"
60
+ }
61
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.57.1"
6
+ }
hf_quant_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.35.0"
5
+ },
6
+ "quantization": {
7
+ "quant_algo": "NVFP4",
8
+ "kv_cache_quant_algo": "FP8",
9
+ "group_size": 16,
10
+ "exclude_modules": [
11
+ "lm_head"
12
+ ]
13
+ }
14
+ }
model-00001-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bb4db4a659a13f561b29a8fd9e1ba796d11f0eb97182d871722eeca28956f5c
3
+ size 4882435272
model-00002-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53bb89f59c6de156c2111a17961d02d44b8ac58bd1e521ab13c383bc9bf9fa8
3
+ size 4869903280
model-00003-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea54e71963c7bbca9bf0102b6cadae70209d4c32105c10d492bab66819a6234
3
+ size 4869903416
model-00004-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed7a10b8344bd2aed9f8c808e02bf04b985a5dece3bbe8ee7dbe438be2d876e2
3
+ size 4969044576
model-00005-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3561d2f23212232601ea5846cf88d79e0cc208e4fa186e4df3e9d635acdf6772
3
+ size 4954838728
model-00006-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944cff5166267dfe2f09119dbf4bad321354b702cffd3bfbef8ea16ca71be1f4
3
+ size 4869903416
model-00007-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8c6ac3f8b8592daab5bf317793efbc8c6bebb419e2a5359f289d782fbe0b4b
3
+ size 4969044576
model-00008-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2f77261dcb902a3ed164356a3d2da59966e61ae32fb0081a7d30eef91196dc6
3
+ size 4954838728
model-00009-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99d7a8ef3367ebadcef1f31ccc8690ea3351b6708cc37ecc6922039b6a8dadd
3
+ size 4869903416
model-00010-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1906cbd8bdefe455dbed519812bed9af2d63804faad1e139b2fef884195ce186
3
+ size 4969044576
model-00011-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ca23aa8fff52e0b676752f6dab2c6f8c0f63ff205d939e68119a28c74d6ee4
3
+ size 4954838728
model-00012-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4457a83c8e71a6c301f66935b199cbc2c82ed77cb9bfe2b1146823663889c61
3
+ size 4869903416
model-00013-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb087b04055062be77e8fc487a764564085c1691fe19f3631c1393e493c110d3
3
+ size 4969044576
model-00014-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af77e4da93d82c6b3cfe7cd882ab29870c111ec561ae17a5fe769ce130814e10
3
+ size 4954838728
model-00015-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51f638ce75246b5a0a3299ceb19924f2abff5b1778990c1d248f3ba0b1c4f96
3
+ size 1201743136
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a
3
+ size 587562
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff