camenduru
/

NeMo

Model card Files Files and versions Community

NeMo / examples /asr /conf /config.yaml

camenduru's picture

thanks to NVIDIA ❤

7934b29 almost 2 years ago

history blame contribute delete

4.28 kB

	name: &name "QuartzNet15x5"
	sample_rate: &sample_rate 16000
	repeat: &repeat 1
	dropout: &dropout 0.0
	separable: &separable true
	labels: &labels [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
	"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]

	model:
	train_ds:
	manifest_filepath: ???
	sample_rate: 16000
	labels: *labels
	batch_size: 32
	trim_silence: True
	max_duration: 16.7
	shuffle: True
	num_workers: 8
	pin_memory: true
	# tarred datasets
	is_tarred: false
	tarred_audio_filepaths: null
	shuffle_n: 2048
	# bucketing params
	bucketing_strategy: "synced_randomized"
	bucketing_batch_size: null

	validation_ds:
	manifest_filepath: ???
	sample_rate: 16000
	labels: *labels
	batch_size: 32
	shuffle: False
	num_workers: 8
	pin_memory: true

	preprocessor:
	_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
	normalize: "per_feature"
	window_size: 0.02
	sample_rate: *sample_rate
	window_stride: 0.01
	window: "hann"
	features: &n_mels 64
	n_fft: 512
	frame_splicing: 1
	dither: 0.00001
	stft_conv: false

	spec_augment:
	_target_: nemo.collections.asr.modules.SpectrogramAugmentation
	rect_freq: 50
	rect_masks: 5
	rect_time: 120

	encoder:
	_target_: nemo.collections.asr.modules.ConvASREncoder
	feat_in: *n_mels
	activation: relu
	conv_mask: true

	jasper:
	- filters: 128
	repeat: 1
	kernel: [11]
	stride: [1]
	dilation: [1]
	dropout: *dropout
	residual: true
	separable: *separable
	se: true
	se_context_size: -1

	- filters: 256
	repeat: *repeat
	kernel: [13]
	stride: [1]
	dilation: [1]
	dropout: *dropout
	residual: true
	separable: *separable
	se: true
	se_context_size: -1

	- filters: 256
	repeat: *repeat
	kernel: [15]
	stride: [1]
	dilation: [1]
	dropout: *dropout
	residual: true
	separable: *separable
	se: true
	se_context_size: -1

	- filters: 256
	repeat: *repeat
	kernel: [17]
	stride: [1]
	dilation: [1]
	dropout: *dropout
	residual: true
	separable: *separable
	se: true
	se_context_size: -1

	- filters: 256
	repeat: *repeat
	kernel: [19]
	stride: [1]
	dilation: [1]
	dropout: *dropout
	residual: true
	separable: *separable
	se: true
	se_context_size: -1

	- filters: 256
	repeat: 1
	kernel: [21]
	stride: [1]
	dilation: [1]
	dropout: 0.0
	residual: false
	separable: *separable
	se: true
	se_context_size: -1

	- filters: &enc_feat_out 1024
	repeat: 1
	kernel: [1]
	stride: [1]
	dilation: [1]
	dropout: 0.0
	residual: false
	separable: *separable
	se: true
	se_context_size: -1

	decoder:
	_target_: nemo.collections.asr.modules.ConvASRDecoder
	feat_in: 1024
	num_classes: 28
	vocabulary: *labels

	optim:
	name: novograd
	# _target_: nemo.core.optim.optimizers.Novograd
	lr: .01
	# optimizer arguments
	betas: [0.8, 0.5]
	weight_decay: 0.001

	# scheduler setup
	sched:
	name: CosineAnnealing

	# pytorch lightning args
	monitor: val_loss
	reduce_on_plateau: false

	# Scheduler params
	warmup_steps: null
	warmup_ratio: null
	min_lr: 0.0
	last_epoch: -1

	trainer:
	devices: 1 # number of gpus
	max_epochs: 5
	max_steps: -1 # computed at runtime if not set
	num_nodes: 1
	accelerator: gpu
	strategy: ddp
	accumulate_grad_batches: 1
	enable_checkpointing: False # Provided by exp_manager
	logger: False # Provided by exp_manager
	log_every_n_steps: 1 # Interval of logging.
	val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
	benchmark: false # needs to be false for models with variable-length speech input as it slows down training

	exp_manager:
	exp_dir: null
	name: *name
	create_tensorboard_logger: True
	create_checkpoint_callback: True