| | batch_size: 16 |
| | accum_grad: 64 |
| | max_epoch: 135 |
| | patience: none |
| | |
| | init: xavier_uniform |
| | best_model_criterion: |
| | - - valid |
| | - acc_asr |
| | - max |
| | keep_nbest_models: 10 |
| |
|
| | encoder: conformer |
| | encoder_conf: |
| | output_size: 256 |
| | attention_heads: 4 |
| | linear_units: 2048 |
| | num_blocks: 12 |
| | dropout_rate: 0.1 |
| | positional_dropout_rate: 0.1 |
| | attention_dropout_rate: 0.0 |
| | input_layer: conv2d |
| | normalize_before: true |
| | macaron_style: true |
| | rel_pos_type: latest |
| | pos_enc_layer_type: rel_pos |
| | selfattention_layer_type: rel_selfattn |
| | activation_type: swish |
| | use_cnn_module: true |
| | cnn_module_kernel: 31 |
| | interctc_layer_idx: [6] |
| |
|
| | decoder: transformer |
| | decoder_conf: |
| | attention_heads: 4 |
| | linear_units: 2048 |
| | num_blocks: 6 |
| | dropout_rate: 0.1 |
| | positional_dropout_rate: 0.1 |
| | self_attention_dropout_rate: 0.0 |
| | src_attention_dropout_rate: 0.0 |
| | return_hidden: true |
| |
|
| | subtitle_encoder: conformer |
| | subtitle_encoder_conf: |
| | output_size: 256 |
| | attention_heads: 4 |
| | linear_units: 2048 |
| | num_blocks: 2 |
| | dropout_rate: 0.1 |
| | positional_dropout_rate: 0.1 |
| | attention_dropout_rate: 0.0 |
| | input_layer: none |
| | normalize_before: true |
| | macaron_style: true |
| | rel_pos_type: latest |
| | pos_enc_layer_type: rel_pos |
| | selfattention_layer_type: rel_selfattn |
| | activation_type: swish |
| | use_cnn_module: true |
| | cnn_module_kernel: 31 |
| |
|
| | subtitle_decoder: multi_transformer |
| | subtitle_decoder_conf: |
| | attention_heads: 4 |
| | linear_units: 2048 |
| | num_blocks: 6 |
| | dropout_rate: 0.1 |
| | positional_dropout_rate: 0.1 |
| | self_attention_dropout_rate: 0.0 |
| | src_attention_dropout_rate: 0.0 |
| |
|
| | model_conf: |
| | asr_weight: 0.5 |
| | subs_weight: 0.5 |
| | ctc_weight: 0.3 |
| | interctc_weight: 0.3 |
| | lsm_weight_asr: 0.1 |
| | lsm_weight_mt: 0.1 |
| | length_normalized_loss: false |
| | condition_subtitle_decoder: true |
| | use_asr_feats: "encoder" |
| |
|
| | optim: adam |
| | optim_conf: |
| | lr: 0.004 |
| | scheduler: warmuplr |
| | scheduler_conf: |
| | warmup_steps: 25000 |
| |
|
| | specaug: specaug |
| | specaug_conf: |
| | apply_time_warp: true |
| | time_warp_window: 5 |
| | time_warp_mode: bicubic |
| | apply_freq_mask: true |
| | freq_mask_width_range: |
| | - 0 |
| | - 30 |
| | num_freq_mask: 2 |
| | apply_time_mask: true |
| | time_mask_width_range: |
| | - 0 |
| | - 40 |
| | num_time_mask: 2 |
| |
|
| |
|