NeMo / examples /asr /conf /contextnet_rnnt /config_rnnt_bpe.yaml

thanks to NVIDIA ❤

7934b29 almost 2 years ago

7.19 kB

	name: &name "ConvRNNTBPE5x1"

	model:
	sample_rate: 16000
	compute_eval_loss: true

	tokenizer:
	dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe)
	type: ??? # Can be either bpe or wpe

	train_ds:
	manifest_filepath: ???
	sample_rate: ${model.sample_rate}
	batch_size: 32
	trim_silence: true
	max_duration: 16.7
	labels: []
	shuffle: true
	num_workers: 8
	pin_memory: true
	# tarred datasets
	is_tarred: false
	tarred_audio_filepaths: null
	shuffle_n: 2048
	# bucketing params
	bucketing_strategy: "synced_randomized"
	bucketing_batch_size: null

	validation_ds:
	manifest_filepath: ???
	sample_rate: ${model.sample_rate}
	batch_size: 32
	shuffle: false
	labels: []
	num_workers: 8
	pin_memory: true

	test_ds:
	manifest_filepath: null
	sample_rate: ${model.sample_rate}
	batch_size: 32
	shuffle: false
	labels: []
	num_workers: 8
	pin_memory: true

	model_defaults:
	repeat: 5
	dropout: 0.0
	separable: true
	se: true
	se_context_size: -1
	# encoder / decoder / joint values
	enc_hidden: 1024
	pred_hidden: 320
	joint_hidden: 320

	preprocessor:
	_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
	normalize: "per_feature"
	window_size: 0.02
	sample_rate: ${model.sample_rate}
	window_stride: 0.01
	window: "hann"
	features: &n_mels 80
	n_fft: 512
	frame_splicing: 1
	dither: 0.00001

	spec_augment:
	_target_: nemo.collections.asr.modules.SpectrogramAugmentation
	rect_freq: 50
	rect_masks: 5
	rect_time: 120

	encoder:
	_target_: nemo.collections.asr.modules.ConvASREncoder
	feat_in: *n_mels
	activation: relu
	conv_mask: true

	jasper:
	- filters: 128
	repeat: 1
	kernel: [11]
	stride: [1]
	dilation: [1]
	dropout: ${model.model_defaults.dropout}
	residual: true
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}

	- filters: 256
	repeat: ${model.model_defaults.repeat}
	kernel: [13]
	stride: [2]
	dilation: [1]
	dropout: ${model.model_defaults.dropout}
	residual: true
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}
	stride_last: true
	residual_mode: "stride_add"

	- filters: 256
	repeat: ${model.model_defaults.repeat}
	kernel: [15]
	stride: [1]
	dilation: [1]
	dropout: ${model.model_defaults.dropout}
	residual: true
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}

	- filters: 256
	repeat: ${model.model_defaults.repeat}
	kernel: [17]
	stride: [2]
	dilation: [1]
	dropout: ${model.model_defaults.dropout}
	residual: true
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}
	stride_last: true
	residual_mode: "stride_add"

	- filters: 256
	repeat: ${model.model_defaults.repeat}
	kernel: [19]
	stride: [1]
	dilation: [1]
	dropout: ${model.model_defaults.dropout}
	residual: true
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}

	- filters: 256
	repeat: 1
	kernel: [21]
	stride: [2]
	dilation: [1]
	dropout: 0.0
	residual: false
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}
	stride_last: true
	residual_mode: "stride_add"

	- filters: ${model.model_defaults.enc_hidden}
	repeat: 1
	kernel: [1]
	stride: [1]
	dilation: [1]
	dropout: 0.0
	residual: false
	separable: ${model.model_defaults.separable}
	se: ${model.model_defaults.se}
	se_context_size: ${model.model_defaults.se_context_size}

	decoder:
	_target_: nemo.collections.asr.modules.RNNTDecoder
	normalization_mode: null
	random_state_sampling: false
	blank_as_pad: true

	prednet:
	pred_hidden: ${model.model_defaults.pred_hidden}
	pred_rnn_layers: 1
	t_max: null
	dropout: 0.0

	joint:
	_target_: nemo.collections.asr.modules.RNNTJoint
	log_softmax: null # sets it according to cpu/gpu device

	# fused mode
	fuse_loss_wer: false
	fused_batch_size: 1

	jointnet:
	joint_hidden: ${model.model_defaults.joint_hidden}
	activation: "relu"
	dropout: 0.0

	decoding:
	strategy: "greedy_batch"

	# greedy strategy config
	greedy:
	max_symbols: 10

	# beam strategy config
	beam:
	beam_size: 2
	score_norm: true
	softmax_temperature: 1.0 # scale the logits by some temperature prior to softmax
	tsd_max_sym_exp: 10 # for Time Synchronous Decoding, int > 0
	alsd_max_target_len: 5.0 # for Alignment-Length Synchronous Decoding, float > 1.0
	maes_num_steps: 2 # for modified Adaptive Expansion Search, int > 0
	maes_prefix_alpha: 1 # for modified Adaptive Expansion Search, int > 0
	maes_expansion_beta: 2 # for modified Adaptive Expansion Search, int >= 0
	maes_expansion_gamma: 2.3 # for modified Adaptive Expansion Search, float >= 0

	loss:
	loss_name: "default"
	warprnnt_numba_kwargs:
	fastemit_lambda: 0.0 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
	clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.

	optim:
	name: adam
	# _target_: nemo.core.optim.optimizers.Adam
	lr: .1

	# optimizer arguments
	betas: [0.9, 0.999]
	weight_decay: 0.0001

	# scheduler setup
	sched:
	name: CosineAnnealing

	# scheduler config override
	warmup_steps: null
	warmup_ratio: 0.05
	min_lr: 1e-6
	last_epoch: -1

	trainer:
	devices: 1 # number of gpus
	max_epochs: 5
	max_steps: -1 # computed at runtime if not set
	num_nodes: 1
	accelerator: gpu
	strategy: ddp
	precision: 32
	accumulate_grad_batches: 1
	enable_checkpointing: False # Provided by exp_manager
	logger: False # Provided by exp_manager
	log_every_n_steps: 1 # Interval of logging.
	val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
	benchmark: false # needs to be false for models with variable-length speech input as it slows down training

	exp_manager:
	exp_dir: null
	name: *name
	create_tensorboard_logger: True
	create_checkpoint_callback: True
	checkpoint_callback_params:
	monitor: "val_wer"
	mode: "min"
	create_wandb_logger: False
	wandb_logger_kwargs:
	name: null
	project: null