# Autogenerated by nbdev d = { 'settings': { 'branch': 'master', 'doc_baseurl': '/WhisperSpeech', 'doc_host': 'https://collabora.github.io', 'git_url': 'https://github.com/collabora/WhisperSpeech', 'lib_path': 'whisperspeech'}, 'syms': { 'whisperspeech.a2wav': { 'whisperspeech.a2wav.Vocoder': ('6. quality-boosting vocoder.html#vocoder', 'whisperspeech/a2wav.py'), 'whisperspeech.a2wav.Vocoder.__init__': ( '6. quality-boosting vocoder.html#vocoder.__init__', 'whisperspeech/a2wav.py'), 'whisperspeech.a2wav.Vocoder.decode': ( '6. quality-boosting vocoder.html#vocoder.decode', 'whisperspeech/a2wav.py'), 'whisperspeech.a2wav.Vocoder.decode_to_file': ( '6. quality-boosting ' 'vocoder.html#vocoder.decode_to_file', 'whisperspeech/a2wav.py'), 'whisperspeech.a2wav.Vocoder.decode_to_notebook': ( '6. quality-boosting ' 'vocoder.html#vocoder.decode_to_notebook', 'whisperspeech/a2wav.py')}, 'whisperspeech.extract_acoustic': { 'whisperspeech.extract_acoustic.extract_Atoks': ( '1. acoustic token ' 'extraction.html#extract_atoks', 'whisperspeech/extract_acoustic.py'), 'whisperspeech.extract_acoustic.extract_acoustic': ( '1. acoustic token ' 'extraction.html#extract_acoustic', 'whisperspeech/extract_acoustic.py'), 'whisperspeech.extract_acoustic.load': ( '1. acoustic token extraction.html#load', 'whisperspeech/extract_acoustic.py'), 'whisperspeech.extract_acoustic.load_model': ( '1. acoustic token ' 'extraction.html#load_model', 'whisperspeech/extract_acoustic.py')}, 'whisperspeech.extract_semb': { 'whisperspeech.extract_semb.encode_semantic': ( '2c. whisper semantic embedding ' 'extraction.html#encode_semantic', 'whisperspeech/extract_semb.py'), 'whisperspeech.extract_semb.extract_semantic': ( '2c. whisper semantic embedding ' 'extraction.html#extract_semantic', 'whisperspeech/extract_semb.py'), 'whisperspeech.extract_semb.load_model': ( '2c. whisper semantic embedding ' 'extraction.html#load_model', 'whisperspeech/extract_semb.py')}, 'whisperspeech.fetch_models': { 'whisperspeech.fetch_models.main': ( '0. download models.html#main', 'whisperspeech/fetch_models.py')}, 'whisperspeech.modules': { 'whisperspeech.modules.Decoder': ('a. neural modules.html#decoder', 'whisperspeech/modules.py'), 'whisperspeech.modules.Decoder.__init__': ( 'a. neural modules.html#decoder.__init__', 'whisperspeech/modules.py'), 'whisperspeech.modules.Decoder.forward': ( 'a. neural modules.html#decoder.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.Encoder': ('a. neural modules.html#encoder', 'whisperspeech/modules.py'), 'whisperspeech.modules.Encoder.__init__': ( 'a. neural modules.html#encoder.__init__', 'whisperspeech/modules.py'), 'whisperspeech.modules.Encoder.forward': ( 'a. neural modules.html#encoder.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.LayerNorm': ('a. neural modules.html#layernorm', 'whisperspeech/modules.py'), 'whisperspeech.modules.LayerNorm.forward': ( 'a. neural modules.html#layernorm.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.LinearHead': ( 'a. neural modules.html#linearhead', 'whisperspeech/modules.py'), 'whisperspeech.modules.MultiHeadAttention': ( 'a. neural modules.html#multiheadattention', 'whisperspeech/modules.py'), 'whisperspeech.modules.MultiHeadAttention.__init__': ( 'a. neural ' 'modules.html#multiheadattention.__init__', 'whisperspeech/modules.py'), 'whisperspeech.modules.MultiHeadAttention.forward': ( 'a. neural ' 'modules.html#multiheadattention.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.MultiHeadAttention.qkv_attention_pth20': ( 'a. neural ' 'modules.html#multiheadattention.qkv_attention_pth20', 'whisperspeech/modules.py'), 'whisperspeech.modules.MultiHeadAttention.qkv_attention_vanilla': ( 'a. neural ' 'modules.html#multiheadattention.qkv_attention_vanilla', 'whisperspeech/modules.py'), 'whisperspeech.modules.MultiHeadAttention.qkv_attention_xformers': ( 'a. neural ' 'modules.html#multiheadattention.qkv_attention_xformers', 'whisperspeech/modules.py'), 'whisperspeech.modules.QueryHead': ('a. neural modules.html#queryhead', 'whisperspeech/modules.py'), 'whisperspeech.modules.ResidualAttentionBlock': ( 'a. neural modules.html#residualattentionblock', 'whisperspeech/modules.py'), 'whisperspeech.modules.ResidualAttentionBlock.__init__': ( 'a. neural ' 'modules.html#residualattentionblock.__init__', 'whisperspeech/modules.py'), 'whisperspeech.modules.ResidualAttentionBlock.forward': ( 'a. neural ' 'modules.html#residualattentionblock.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.Rotary': ('a. neural modules.html#rotary', 'whisperspeech/modules.py'), 'whisperspeech.modules.Rotary.__init__': ( 'a. neural modules.html#rotary.__init__', 'whisperspeech/modules.py'), 'whisperspeech.modules.Rotary.forward': ( 'a. neural modules.html#rotary.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.SumDecoder': ( 'a. neural modules.html#sumdecoder', 'whisperspeech/modules.py'), 'whisperspeech.modules.SumDecoder.__init__': ( 'a. neural modules.html#sumdecoder.__init__', 'whisperspeech/modules.py'), 'whisperspeech.modules.SumDecoder.forward': ( 'a. neural modules.html#sumdecoder.forward', 'whisperspeech/modules.py'), 'whisperspeech.modules.apply_rotary_pos_emb': ( 'a. neural modules.html#apply_rotary_pos_emb', 'whisperspeech/modules.py'), 'whisperspeech.modules.init_transformer': ( 'a. neural modules.html#init_transformer', 'whisperspeech/modules.py'), 'whisperspeech.modules.rotate_half': ( 'a. neural modules.html#rotate_half', 'whisperspeech/modules.py'), 'whisperspeech.modules.sinusoids': ('a. neural modules.html#sinusoids', 'whisperspeech/modules.py')}, 'whisperspeech.pipeline': { 'whisperspeech.pipeline.Pipeline': ('7. pipeline.html#pipeline', 'whisperspeech/pipeline.py'), 'whisperspeech.pipeline.Pipeline.__init__': ( '7. pipeline.html#pipeline.__init__', 'whisperspeech/pipeline.py'), 'whisperspeech.pipeline.Pipeline.generate': ( '7. pipeline.html#pipeline.generate', 'whisperspeech/pipeline.py'), 'whisperspeech.pipeline.Pipeline.generate_atoks': ( '7. pipeline.html#pipeline.generate_atoks', 'whisperspeech/pipeline.py'), 'whisperspeech.pipeline.Pipeline.generate_to_file': ( '7. pipeline.html#pipeline.generate_to_file', 'whisperspeech/pipeline.py'), 'whisperspeech.pipeline.Pipeline.generate_to_notebook': ( '7. ' 'pipeline.html#pipeline.generate_to_notebook', 'whisperspeech/pipeline.py')}, 'whisperspeech.prepare_s2a_dataset': { 'whisperspeech.prepare_s2a_dataset.flac_to_s2a_name': ( '4a. s2a dataset ' 'preparation.html#flac_to_s2a_name', 'whisperspeech/prepare_s2a_dataset.py'), 'whisperspeech.prepare_s2a_dataset.prepare_s2a': ( '4a. s2a dataset ' 'preparation.html#prepare_s2a', 'whisperspeech/prepare_s2a_dataset.py'), 'whisperspeech.prepare_s2a_dataset.resampler': ( '4a. s2a dataset ' 'preparation.html#resampler', 'whisperspeech/prepare_s2a_dataset.py')}, 'whisperspeech.prepare_t2s_dataset': { 'whisperspeech.prepare_t2s_dataset.Transcriber': ( '5a. t2s dataset ' 'preparation.html#transcriber', 'whisperspeech/prepare_t2s_dataset.py'), 'whisperspeech.prepare_t2s_dataset.Transcriber.__init__': ( '5a. t2s dataset ' 'preparation.html#transcriber.__init__', 'whisperspeech/prepare_t2s_dataset.py'), 'whisperspeech.prepare_t2s_dataset.Transcriber.transcribe': ( '5a. t2s dataset ' 'preparation.html#transcriber.transcribe', 'whisperspeech/prepare_t2s_dataset.py'), 'whisperspeech.prepare_t2s_dataset.flac_to_t2s_name': ( '5a. t2s dataset ' 'preparation.html#flac_to_t2s_name', 'whisperspeech/prepare_t2s_dataset.py'), 'whisperspeech.prepare_t2s_dataset.prepare_t2s': ( '5a. t2s dataset ' 'preparation.html#prepare_t2s', 'whisperspeech/prepare_t2s_dataset.py')}, 'whisperspeech.s2a_delar_mup_wds': { 'whisperspeech.s2a_delar_mup_wds.CMLMVisual': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.__init__': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual.__init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.add_data': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual.add_data', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.add_table_row': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#cmlmvisual.add_table_row', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.hide': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual.hide', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.on_iter': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual.on_iter', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.plot': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual.plot', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.CMLMVisual.show': ( '4b. semantic to acoustic token ' 'modeling.html#cmlmvisual.show', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.DelSumDecoder': ( '4b. semantic to acoustic token ' 'modeling.html#delsumdecoder', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.DelSumDecoder.__init__': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#delsumdecoder.__init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.DelSumDecoder.forward': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#delsumdecoder.forward', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.EmbeddingProjector': ( '4b. semantic to acoustic token ' 'modeling.html#embeddingprojector', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention': ( '4b. semantic to acoustic token ' 'modeling.html#multiheadattention', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.__init__': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#multiheadattention.__init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.forward': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#multiheadattention.forward', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.qkv_attention_pth20': ( '4b. semantic ' 'to acoustic ' 'token ' 'modeling.html#multiheadattention.qkv_attention_pth20', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.qkv_attention_xformers': ( '4b. ' 'semantic ' 'to ' 'acoustic ' 'token ' 'modeling.html#multiheadattention.qkv_attention_xformers', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#residualattentionblock', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock.__init__': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#residualattentionblock.__init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock.forward': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#residualattentionblock.forward', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.Rotary': ( '4b. semantic to acoustic token ' 'modeling.html#rotary', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.Rotary.__init__': ( '4b. semantic to acoustic token ' 'modeling.html#rotary.__init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.Rotary.forward': ( '4b. semantic to acoustic token ' 'modeling.html#rotary.forward', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer': ( '4b. semantic to acoustic token ' 'modeling.html#sadelartransformer', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.__init__': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.__init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.device': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#sadelartransformer.device', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.embed_stoks': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.embed_stoks', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.forward': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#sadelartransformer.forward', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.generate': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.generate', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.get_extra_state': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.get_extra_state', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.get_metrics': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.get_metrics', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.init_transformer': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.init_transformer', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_checkpoint': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.load_checkpoint', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_frozen_semantic_embeddings': ( '4b. ' 'semantic ' 'to ' 'acoustic ' 'token ' 'modeling.html#sadelartransformer.load_frozen_semantic_embeddings', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_model': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.load_model', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.save_model': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.save_model', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.set_extra_state': ( '4b. semantic to ' 'acoustic token ' 'modeling.html#sadelartransformer.set_extra_state', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.setup': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#sadelartransformer.setup', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.Tunables': ( '4b. semantic to acoustic token ' 'modeling.html#tunables', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.Tunables.__post_init__': ( '4b. semantic to acoustic ' 'token ' 'modeling.html#tunables.__post_init__', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.Tunables.upgrade': ( '4b. semantic to acoustic token ' 'modeling.html#tunables.upgrade', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds._make_model': ( '4b. semantic to acoustic token ' 'modeling.html#_make_model', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.apply_rotary_pos_emb': ( '4b. semantic to acoustic token ' 'modeling.html#apply_rotary_pos_emb', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.load_datasets': ( '4b. semantic to acoustic token ' 'modeling.html#load_datasets', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.make_model': ( '4b. semantic to acoustic token ' 'modeling.html#make_model', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.pad_samples': ( '4b. semantic to acoustic token ' 'modeling.html#pad_samples', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.rand': ( '4b. semantic to acoustic token ' 'modeling.html#rand', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.random_trunc': ( '4b. semantic to acoustic token ' 'modeling.html#random_trunc', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.rotate_half': ( '4b. semantic to acoustic token ' 'modeling.html#rotate_half', 'whisperspeech/s2a_delar_mup_wds.py'), 'whisperspeech.s2a_delar_mup_wds.speaker_id_extractor': ( '4b. semantic to acoustic token ' 'modeling.html#speaker_id_extractor', 'whisperspeech/s2a_delar_mup_wds.py')}, 'whisperspeech.t2s_up_wds': { 'whisperspeech.t2s_up_wds.CharTokenizer': ( '5b. text to semantic token ' 'modeling.html#chartokenizer', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.CharTokenizer.decode': ( '5b. text to semantic token ' 'modeling.html#chartokenizer.decode', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.CharTokenizer.encode': ( '5b. text to semantic token ' 'modeling.html#chartokenizer.encode', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Decoder': ( '5b. text to semantic token modeling.html#decoder', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Decoder.__init__': ( '5b. text to semantic token ' 'modeling.html#decoder.__init__', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Decoder.forward': ( '5b. text to semantic token ' 'modeling.html#decoder.forward', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.EmbeddingProjector': ( '5b. text to semantic token ' 'modeling.html#embeddingprojector', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Encoder': ( '5b. text to semantic token modeling.html#encoder', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Encoder.__init__': ( '5b. text to semantic token ' 'modeling.html#encoder.__init__', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Encoder.forward': ( '5b. text to semantic token ' 'modeling.html#encoder.forward', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer': ( '5b. text to semantic token ' 'modeling.html#tsartransformer', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.__init__': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.__init__', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.device': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.device', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.ensure_tokenizer': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.ensure_tokenizer', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.forward': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.forward', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.generate': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.generate', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.generate_batch': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.generate_batch', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.init_transformer': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.init_transformer', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.load_checkpoint': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.load_checkpoint', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.load_frozen_semantic_embeddings': ( '5b. text to ' 'semantic token ' 'modeling.html#tsartransformer.load_frozen_semantic_embeddings', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.load_model': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.load_model', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.save_model': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.save_model', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.TSARTransformer.setup': ( '5b. text to semantic token ' 'modeling.html#tsartransformer.setup', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Tunables': ( '5b. text to semantic token modeling.html#tunables', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.Tunables.__post_init__': ( '5b. text to semantic token ' 'modeling.html#tunables.__post_init__', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds._make_model': ( '5b. text to semantic token modeling.html#_make_model', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.ar_padder': ( '5b. text to semantic token modeling.html#ar_padder', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.build_speaker_map': ( '5b. text to semantic token ' 'modeling.html#build_speaker_map', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.char_per_seconder': ( '5b. text to semantic token ' 'modeling.html#char_per_seconder', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.load_datasets': ( '5b. text to semantic token ' 'modeling.html#load_datasets', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.make_model': ( '5b. text to semantic token modeling.html#make_model', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.rand': ( '5b. text to semantic token modeling.html#rand', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.speaker_id_extractor': ( '5b. text to semantic token ' 'modeling.html#speaker_id_extractor', 'whisperspeech/t2s_up_wds.py'), 'whisperspeech.t2s_up_wds.tokenizer': ( '5b. text to semantic token modeling.html#tokenizer', 'whisperspeech/t2s_up_wds.py')}, 'whisperspeech.train': { 'whisperspeech.train.SimpleVisual': ('b1. training.html#simplevisual', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.__init__': ( 'b1. training.html#simplevisual.__init__', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.add_data': ( 'b1. training.html#simplevisual.add_data', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.add_table_row': ( 'b1. training.html#simplevisual.add_table_row', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.hide': ( 'b1. training.html#simplevisual.hide', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.on_iter': ( 'b1. training.html#simplevisual.on_iter', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.plot': ( 'b1. training.html#simplevisual.plot', 'whisperspeech/train.py'), 'whisperspeech.train.SimpleVisual.show': ( 'b1. training.html#simplevisual.show', 'whisperspeech/train.py'), 'whisperspeech.train.train': ('b1. training.html#train', 'whisperspeech/train.py'), 'whisperspeech.train.validate': ('b1. training.html#validate', 'whisperspeech/train.py')}, 'whisperspeech.train_multi': { 'whisperspeech.train_multi.TrainingTask': ( 'b2. training (lightning).html#trainingtask', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.__init__': ( 'b2. training ' '(lightning).html#trainingtask.__init__', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.configure_optimizers': ( 'b2. training ' '(lightning).html#trainingtask.configure_optimizers', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.on_fit_start': ( 'b2. training ' '(lightning).html#trainingtask.on_fit_start', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.on_validation_epoch_end': ( 'b2. training ' '(lightning).html#trainingtask.on_validation_epoch_end', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.test_step': ( 'b2. training ' '(lightning).html#trainingtask.test_step', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.training_step': ( 'b2. training ' '(lightning).html#trainingtask.training_step', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.TrainingTask.validation_step': ( 'b2. training ' '(lightning).html#trainingtask.validation_step', 'whisperspeech/train_multi.py'), 'whisperspeech.train_multi.parse_and_call': ( 'b2. training (lightning).html#parse_and_call', 'whisperspeech/train_multi.py')}, 'whisperspeech.vad': { 'whisperspeech.vad.extract_segments': ( '1b. voice activity detection.html#extract_segments', 'whisperspeech/vad.py'), 'whisperspeech.vad.fix_dots_in_names': ( '1b. voice activity detection.html#fix_dots_in_names', 'whisperspeech/vad.py'), 'whisperspeech.vad.flac_to_vad_name': ( '1b. voice activity detection.html#flac_to_vad_name', 'whisperspeech/vad.py'), 'whisperspeech.vad.load_dataset': ( '1b. voice activity detection.html#load_dataset', 'whisperspeech/vad.py'), 'whisperspeech.vad.process_shard': ( '1b. voice activity detection.html#process_shard', 'whisperspeech/vad.py'), 'whisperspeech.vad.segment_audio': ( '1b. voice activity detection.html#segment_audio', 'whisperspeech/vad.py')}, 'whisperspeech.verify_wds': { 'whisperspeech.verify_wds.process_shard': ( '0. verify webdataset archives.html#process_shard', 'whisperspeech/verify_wds.py')}, 'whisperspeech.vq_stoks': { 'whisperspeech.vq_stoks.RQBottleneckTransformer': ( '2b. whisper quantization (semantic token) ' 'model.html#rqbottlenecktransformer', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.__init__': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.__init__', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.decode_text': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.decode_text', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.dequantize': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.dequantize', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.device': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.device', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.downsample_embeddings': ( '2b. whisper ' 'quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.downsample_embeddings', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.encode_audio': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.encode_audio', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.encode_mel': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.encode_mel', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.ensure_whisper': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.ensure_whisper', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.extract_teacher': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.extract_teacher', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.forward': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.forward', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.get_metrics': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.get_metrics', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.init_transformer': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.init_transformer', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.load_checkpoint': ( '2b. whisper quantization ' '(semantic token) ' 'model.html#rqbottlenecktransformer.load_checkpoint', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.load_model': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.load_model', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.quantize': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.quantize', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.save_model': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.save_model', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.RQBottleneckTransformer.setup': ( '2b. whisper quantization (semantic ' 'token) ' 'model.html#rqbottlenecktransformer.setup', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.Tunables': ( '2b. whisper quantization (semantic token) ' 'model.html#tunables', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.Tunables.__post_init__': ( '2b. whisper quantization (semantic token) ' 'model.html#tunables.__post_init__', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.Tunables.upgrade': ( '2b. whisper quantization (semantic token) ' 'model.html#tunables.upgrade', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.add_masks': ( '2b. whisper quantization (semantic token) ' 'model.html#add_masks', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.derived_dataset': ( '2b. whisper quantization (semantic token) ' 'model.html#derived_dataset', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.load_datasets': ( '2b. whisper quantization (semantic token) ' 'model.html#load_datasets', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.logrand': ( '2b. whisper quantization (semantic token) model.html#logrand', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.make_model': ( '2b. whisper quantization (semantic token) ' 'model.html#make_model', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.merge_in': ( '2b. whisper quantization (semantic token) ' 'model.html#merge_in', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.rand': ( '2b. whisper quantization (semantic token) model.html#rand', 'whisperspeech/vq_stoks.py'), 'whisperspeech.vq_stoks.tokenize_text': ( '2b. whisper quantization (semantic token) ' 'model.html#tokenize_text', 'whisperspeech/vq_stoks.py')}, 'whisperspeech.wer_metrics': { 'whisperspeech.wer_metrics.DfBuilder': ( 'c. word error rate metrics.html#dfbuilder', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.DfBuilder.__init__': ( 'c. word error rate ' 'metrics.html#dfbuilder.__init__', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.DfBuilder.df': ( 'c. word error rate metrics.html#dfbuilder.df', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.DfBuilder.push': ( 'c. word error rate metrics.html#dfbuilder.push', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.WERStats': ( 'c. word error rate metrics.html#werstats', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.WERStats.__init__': ( 'c. word error rate ' 'metrics.html#werstats.__init__', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.WERStats.push_sample': ( 'c. word error rate ' 'metrics.html#werstats.push_sample', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.librispeech_data': ( 'c. word error rate ' 'metrics.html#librispeech_data', 'whisperspeech/wer_metrics.py'), 'whisperspeech.wer_metrics.whisper_normalize': ( 'c. word error rate ' 'metrics.html#whisper_normalize', 'whisperspeech/wer_metrics.py')}, 'whisperspeech.wh_transcribe': { 'whisperspeech.wh_transcribe.chunk_merger': ( '2a. whisper quantization dataset ' 'preparation.html#chunk_merger', 'whisperspeech/wh_transcribe.py'), 'whisperspeech.wh_transcribe.flac_to_txt_name': ( '2a. whisper quantization dataset ' 'preparation.html#flac_to_txt_name', 'whisperspeech/wh_transcribe.py'), 'whisperspeech.wh_transcribe.merge_in': ( '2a. whisper quantization dataset ' 'preparation.html#merge_in', 'whisperspeech/wh_transcribe.py'), 'whisperspeech.wh_transcribe.process_shard': ( '2a. whisper quantization dataset ' 'preparation.html#process_shard', 'whisperspeech/wh_transcribe.py'), 'whisperspeech.wh_transcribe.random_cutter': ( '2a. whisper quantization dataset ' 'preparation.html#random_cutter', 'whisperspeech/wh_transcribe.py'), 'whisperspeech.wh_transcribe.split_to_chunks': ( '2a. whisper quantization dataset ' 'preparation.html#split_to_chunks', 'whisperspeech/wh_transcribe.py'), 'whisperspeech.wh_transcribe.wds_compose': ( '2a. whisper quantization dataset ' 'preparation.html#wds_compose', 'whisperspeech/wh_transcribe.py')}}}