Dia2-2B / config.json
NariLabs's picture
Trim config to runtime essentials
6a8c5a0 verified
raw
history blame
1.44 kB
{
"data": {
"channels": 34,
"text_vocab_size": 49280,
"audio_vocab_size": 2050,
"action_vocab_size": 2,
"text_pad_token_id": 3,
"text_new_word_token_id": 2,
"text_zero_token_id": 7,
"audio_pad_token_id": 2049,
"audio_bos_token_id": 2048,
"action_pad_token_id": 0,
"action_new_word_token_id": 1,
"delay_pattern": [
16, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18
],
"first_word_min_start": 3,
"max_pad": 8,
"second_stream_ahead": 2
},
"model": {
"decoder": {
"n_layer": 28,
"n_embd": 2048,
"n_hidden": 6144,
"gqa_query_heads": 16,
"kv_heads": 8,
"gqa_head_dim": 128
},
"depformer": {
"n_layer": 4,
"n_embd": 1024,
"n_hidden": 3072,
"gqa_query_heads": 8,
"kv_heads": 8,
"gqa_head_dim": 128,
"apply_rope": true,
"text_embedding": false
},
"linear": {
"mlp_activations": [
"silu",
"linear"
]
},
"dropout": 0.0,
"rope_min_timescale": 1,
"rope_max_timescale": 10000.0,
"normalization_layer_epsilon": 0.000001
},
"runtime": {
"weights_schedule": [
0, 0,
1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4
],
"max_context_steps": 1500
}
}