Add Dia2-2B inference bundle

Browse files

Files changed (9) hide show

README.md +30 -3
added_tokens.json +54 -0
config.json +120 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +389 -0
tokenizer.json +0 -0
tokenizer_config.json +619 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,30 @@
----
-license: apache-2.0
----

+# Dia2 2B (new_dia)
+This repo holds the inference assets for the Dia2 2B voice model:
+- `config.json`: minimal runtime config consumed by `new_dia.config.load_config`.
+- `model.safetensors`: FP32 weights in the bias-free linear layout.
+- Tokenizer bundle (`tokenizer.json`, `tokenizer_config.json`, `special_tokens_map.json`, `vocab.json`, `merges.txt`, `added_tokens.json`).
+## Usage
+```bash
+pip install -U torch transformers safetensors huggingface_hub
+uv run -m new_dia.cli \
+  --config nari-labs/Dia2-2B --weights nari-labs/Dia2-2B \
+  --out output.wav --cfg 2.0 --temperature 0.8 --dtype bfloat16
+```
+Or via Python:
+```python
+from new_dia.runtime.generator import TextToSpeechGenerator
+runtime = TextToSpeechGenerator.from_paths(
+    config_path="nari-labs/Dia2-2B",
+    weights_path="nari-labs/Dia2-2B",
+    device="cuda",
+    dtype="bfloat16",
+)
+```
+Mimi codec weights are fetched from `kyutai/mimi` at runtime.

added_tokens.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "(applause)": 49163,
+  "(audience applauds)": 49202,
+  "(audience cheers)": 49203,
+  "(audience laughs)": 49201,
+  "(barks)": 49173,
+  "(beatboxing)": 49181,
+  "(beep)": 49188,
+  "(burps)": 49168,
+  "(buzzer)": 49189,
+  "(car engine sound)": 49200,
+  "(cheers)": 49174,
+  "(claps)": 49164,
+  "(clears throat)": 49156,
+  "(clicks)": 49184,
+  "(coughs)": 49155,
+  "(ding)": 49190,
+  "(explosion)": 49196,
+  "(gasps)": 49158,
+  "(groans)": 49161,
+  "(growls)": 49172,
+  "(grunts)": 49166,
+  "(gulps)": 49187,
+  "(gunfire)": 49195,
+  "(horn honks)": 49199,
+  "(hums)": 49167,
+  "(knocks)": 49185,
+  "(laughs)": 49154,
+  "(mumbles)": 49160,
+  "(panting)": 49186,
+  "(phone ringing)": 49197,
+  "(screams)": 49165,
+  "(sighs)": 49157,
+  "(sings)": 49159,
+  "(sips)": 49175,
+  "(siren)": 49198,
+  "(sizzling)": 49192,
+  "(slurps)": 49178,
+  "(snaps)": 49170,
+  "(sneezes)": 49182,
+  "(sniffs)": 49162,
+  "(snores)": 49176,
+  "(snorts)": 49179,
+  "(sobs)": 49180,
+  "(squeaks)": 49171,
+  "(thuds)": 49183,
+  "(thunder)": 49194,
+  "(tires screeching)": 49193,
+  "(whispers)": 49177,
+  "(whistles)": 49169,
+  "(whoosh)": 49191,
+  "[S1]": 49152,
+  "[S2]": 49153
+}

config.json ADDED Viewed

	@@ -0,0 +1,120 @@

+{
+  "version": "2B",
+  "data": {
+    "channels": 34,
+    "text_vocab_size": 49280,
+    "audio_vocab_size": 2050,
+    "action_vocab_size": 2,
+    "text_pad_token_id": 3,
+    "text_new_word_token_id": 2,
+    "text_zero_token_id": 7,
+    "audio_pad_token_id": 2049,
+    "audio_bos_token_id": 2048,
+    "action_pad_token_id": 0,
+    "action_new_word_token_id": 1,
+    "delay_pattern": [
+      16,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18,
+      18
+    ],
+    "first_word_min_start": 3,
+    "max_pad": 8,
+    "second_stream_ahead": 2
+  },
+  "model": {
+    "decoder": {
+      "n_layer": 28,
+      "n_embd": 2048,
+      "n_hidden": 6144,
+      "gqa_query_heads": 16,
+      "kv_heads": 8,
+      "gqa_head_dim": 128
+    },
+    "depformer": {
+      "n_layer": 4,
+      "n_embd": 1024,
+      "n_hidden": 3072,
+      "gqa_query_heads": 8,
+      "kv_heads": 8,
+      "gqa_head_dim": 128,
+      "apply_rope": true,
+      "text_embedding": false
+    },
+    "linear": {
+      "mlp_activations": [
+        "silu",
+        "linear"
+      ]
+    },
+    "dropout": 0.0,
+    "rope_min_timescale": 1,
+    "rope_max_timescale": 10000.0,
+    "normalization_layer_epsilon": 1e-06
+  },
+  "runtime": {
+    "compute_dtype": "bfloat16",
+    "logits_dtype": "float32",
+    "weights_schedule": [
+      0,
+      0,
+      1,
+      1,
+      1,
+      1,
+      2,
+      2,
+      2,
+      2,
+      2,
+      2,
+      2,
+      2,
+      3,
+      3,
+      3,
+      3,
+      3,
+      3,
+      3,
+      3,
+      4,
+      4,
+      4,
+      4,
+      4,
+      4,
+      4,
+      4,
+      4
+    ]
+  }
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3eb6b4758beaae97dcfa87d430706ede0d78306cc3afb1eaf4397f216abd1db
+size 7678277416

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,389 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[S1]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[S2]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(laughs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(coughs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(clears throat)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sighs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(gasps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sings)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(mumbles)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(groans)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sniffs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(applause)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(claps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(screams)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(grunts)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(hums)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(burps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(whistles)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(snaps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(squeaks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(growls)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(barks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(cheers)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sips)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(snores)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(whispers)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(slurps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(snorts)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sobs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(beatboxing)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sneezes)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(thuds)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(clicks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(knocks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(panting)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(gulps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(beep)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(buzzer)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(ding)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(whoosh)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(sizzling)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(tires screeching)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(thunder)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(gunfire)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(explosion)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(phone ringing)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(siren)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(horn honks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(car engine sound)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(audience laughs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(audience applauds)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "(audience cheers)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,619 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<repo_name>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<jupyter_script>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49152": {
+      "content": "[S1]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49153": {
+      "content": "[S2]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49154": {
+      "content": "(laughs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49155": {
+      "content": "(coughs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49156": {
+      "content": "(clears throat)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49157": {
+      "content": "(sighs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49158": {
+      "content": "(gasps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49159": {
+      "content": "(sings)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49160": {
+      "content": "(mumbles)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49161": {
+      "content": "(groans)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49162": {
+      "content": "(sniffs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49163": {
+      "content": "(applause)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49164": {
+      "content": "(claps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49165": {
+      "content": "(screams)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49166": {
+      "content": "(grunts)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49167": {
+      "content": "(hums)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49168": {
+      "content": "(burps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49169": {
+      "content": "(whistles)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49170": {
+      "content": "(snaps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49171": {
+      "content": "(squeaks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49172": {
+      "content": "(growls)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49173": {
+      "content": "(barks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49174": {
+      "content": "(cheers)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49175": {
+      "content": "(sips)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49176": {
+      "content": "(snores)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49177": {
+      "content": "(whispers)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49178": {
+      "content": "(slurps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49179": {
+      "content": "(snorts)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49180": {
+      "content": "(sobs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49181": {
+      "content": "(beatboxing)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49182": {
+      "content": "(sneezes)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49183": {
+      "content": "(thuds)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49184": {
+      "content": "(clicks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49185": {
+      "content": "(knocks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49186": {
+      "content": "(panting)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49187": {
+      "content": "(gulps)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49188": {
+      "content": "(beep)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49189": {
+      "content": "(buzzer)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49190": {
+      "content": "(ding)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49191": {
+      "content": "(whoosh)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49192": {
+      "content": "(sizzling)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49193": {
+      "content": "(tires screeching)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49194": {
+      "content": "(thunder)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49195": {
+      "content": "(gunfire)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49196": {
+      "content": "(explosion)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49197": {
+      "content": "(phone ringing)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49198": {
+      "content": "(siren)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49199": {
+      "content": "(horn honks)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49200": {
+      "content": "(car engine sound)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49201": {
+      "content": "(audience laughs)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49202": {
+      "content": "(audience applauds)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49203": {
+      "content": "(audience cheers)",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "[S1]",
+    "[S2]",
+    "(laughs)",
+    "(coughs)",
+    "(clears throat)",
+    "(sighs)",
+    "(gasps)",
+    "(sings)",
+    "(mumbles)",
+    "(groans)",
+    "(sniffs)",
+    "(applause)",
+    "(claps)",
+    "(screams)",
+    "(grunts)",
+    "(hums)",
+    "(burps)",
+    "(whistles)",
+    "(snaps)",
+    "(squeaks)",
+    "(growls)",
+    "(barks)",
+    "(cheers)",
+    "(sips)",
+    "(snores)",
+    "(whispers)",
+    "(slurps)",
+    "(snorts)",
+    "(sobs)",
+    "(beatboxing)",
+    "(sneezes)",
+    "(thuds)",
+    "(clicks)",
+    "(knocks)",
+    "(panting)",
+    "(gulps)",
+    "(beep)",
+    "(buzzer)",
+    "(ding)",
+    "(whoosh)",
+    "(sizzling)",
+    "(tires screeching)",
+    "(thunder)",
+    "(gunfire)",
+    "(explosion)",
+    "(phone ringing)",
+    "(siren)",
+    "(horn honks)",
+    "(car engine sound)",
+    "(audience laughs)",
+    "(audience applauds)",
+    "(audience cheers)"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49204
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff