zyusc
/

mamba-test

zyusc commited on Nov 9, 2024

Commit

2d56f53

verified ·

1 Parent(s): 7491223

Upload tokenizer_config.json

Files changed (1) hide show

tokenizer_config.json CHANGED Viewed

@@ -1,51 +1,7 @@
 {
-  "add_bos_token": true,
-  "add_eos_token": false,
-  "add_prefix_space": null,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>"
-  ],
-  "bos_token": "<|im_start|>",
-  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "legacy": true,
-  "max_length": 4096,
-  "model_max_length": 4096,
-  "pad_token": "[PAD]",
-  "sp_model_kwargs": {},
-  "spaces_between_special_tokens": false,
-  "stride": 0,
-  "tokenizer_class": "LlamaTokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
-  "unk_token": null,
-  "use_default_system_prompt": false
 }

 {
+  "tokenizer_class": "GPT2Tokenizer",
+  "vocab_size": 50257,  // Match this with Mamba's vocab size if needed
+  "padding_side": "right",
+  "special_tokens_map_file": null,
+  "model_max_length": 1024  // Define based on the sequence length your model supports
 }