nari-labs
/

Dia2-2B

Model card Files Files and versions

Dia2-2B / config.json

NariLabs's picture

Trim config to runtime essentials

6a8c5a0 verified 12 days ago

1.44 kB

	{
	"data": {
	"channels": 34,
	"text_vocab_size": 49280,
	"audio_vocab_size": 2050,
	"action_vocab_size": 2,
	"text_pad_token_id": 3,
	"text_new_word_token_id": 2,
	"text_zero_token_id": 7,
	"audio_pad_token_id": 2049,
	"audio_bos_token_id": 2048,
	"action_pad_token_id": 0,
	"action_new_word_token_id": 1,
	"delay_pattern": [
	16, 18, 18, 18, 18, 18, 18, 18,
	18, 18, 18, 18, 18, 18, 18, 18,
	18, 18, 18, 18, 18, 18, 18, 18,
	18, 18, 18, 18, 18, 18, 18, 18
	],
	"first_word_min_start": 3,
	"max_pad": 8,
	"second_stream_ahead": 2
	},
	"model": {
	"decoder": {
	"n_layer": 28,
	"n_embd": 2048,
	"n_hidden": 6144,
	"gqa_query_heads": 16,
	"kv_heads": 8,
	"gqa_head_dim": 128
	},
	"depformer": {
	"n_layer": 4,
	"n_embd": 1024,
	"n_hidden": 3072,
	"gqa_query_heads": 8,
	"kv_heads": 8,
	"gqa_head_dim": 128,
	"apply_rope": true,
	"text_embedding": false
	},
	"linear": {
	"mlp_activations": [
	"silu",
	"linear"
	]
	},
	"dropout": 0.0,
	"rope_min_timescale": 1,
	"rope_max_timescale": 10000.0,
	"normalization_layer_epsilon": 0.000001
	},
	"runtime": {
	"weights_schedule": [
	0, 0,
	1, 1, 1, 1,
	2, 2, 2, 2, 2, 2, 2, 2,
	3, 3, 3, 3, 3, 3, 3, 3,
	4, 4, 4, 4, 4, 4, 4, 4, 4
	],
	"max_context_steps": 1500
	}
	}