tonic
Laion WhisperSpeech Demo
33d9042
raw
history blame
No virus
80.5 kB
# Autogenerated by nbdev
d = { 'settings': { 'branch': 'master',
'doc_baseurl': '/WhisperSpeech',
'doc_host': 'https://collabora.github.io',
'git_url': 'https://github.com/collabora/WhisperSpeech',
'lib_path': 'whisperspeech'},
'syms': { 'whisperspeech.a2wav': { 'whisperspeech.a2wav.Vocoder': ('6. quality-boosting vocoder.html#vocoder', 'whisperspeech/a2wav.py'),
'whisperspeech.a2wav.Vocoder.__init__': ( '6. quality-boosting vocoder.html#vocoder.__init__',
'whisperspeech/a2wav.py'),
'whisperspeech.a2wav.Vocoder.decode': ( '6. quality-boosting vocoder.html#vocoder.decode',
'whisperspeech/a2wav.py'),
'whisperspeech.a2wav.Vocoder.decode_to_file': ( '6. quality-boosting '
'vocoder.html#vocoder.decode_to_file',
'whisperspeech/a2wav.py'),
'whisperspeech.a2wav.Vocoder.decode_to_notebook': ( '6. quality-boosting '
'vocoder.html#vocoder.decode_to_notebook',
'whisperspeech/a2wav.py')},
'whisperspeech.extract_acoustic': { 'whisperspeech.extract_acoustic.extract_Atoks': ( '1. acoustic token '
'extraction.html#extract_atoks',
'whisperspeech/extract_acoustic.py'),
'whisperspeech.extract_acoustic.extract_acoustic': ( '1. acoustic token '
'extraction.html#extract_acoustic',
'whisperspeech/extract_acoustic.py'),
'whisperspeech.extract_acoustic.load': ( '1. acoustic token extraction.html#load',
'whisperspeech/extract_acoustic.py'),
'whisperspeech.extract_acoustic.load_model': ( '1. acoustic token '
'extraction.html#load_model',
'whisperspeech/extract_acoustic.py')},
'whisperspeech.extract_semb': { 'whisperspeech.extract_semb.encode_semantic': ( '2c. whisper semantic embedding '
'extraction.html#encode_semantic',
'whisperspeech/extract_semb.py'),
'whisperspeech.extract_semb.extract_semantic': ( '2c. whisper semantic embedding '
'extraction.html#extract_semantic',
'whisperspeech/extract_semb.py'),
'whisperspeech.extract_semb.load_model': ( '2c. whisper semantic embedding '
'extraction.html#load_model',
'whisperspeech/extract_semb.py')},
'whisperspeech.fetch_models': { 'whisperspeech.fetch_models.main': ( '0. download models.html#main',
'whisperspeech/fetch_models.py')},
'whisperspeech.modules': { 'whisperspeech.modules.Decoder': ('a. neural modules.html#decoder', 'whisperspeech/modules.py'),
'whisperspeech.modules.Decoder.__init__': ( 'a. neural modules.html#decoder.__init__',
'whisperspeech/modules.py'),
'whisperspeech.modules.Decoder.forward': ( 'a. neural modules.html#decoder.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.Encoder': ('a. neural modules.html#encoder', 'whisperspeech/modules.py'),
'whisperspeech.modules.Encoder.__init__': ( 'a. neural modules.html#encoder.__init__',
'whisperspeech/modules.py'),
'whisperspeech.modules.Encoder.forward': ( 'a. neural modules.html#encoder.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.LayerNorm': ('a. neural modules.html#layernorm', 'whisperspeech/modules.py'),
'whisperspeech.modules.LayerNorm.forward': ( 'a. neural modules.html#layernorm.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.LinearHead': ( 'a. neural modules.html#linearhead',
'whisperspeech/modules.py'),
'whisperspeech.modules.MultiHeadAttention': ( 'a. neural modules.html#multiheadattention',
'whisperspeech/modules.py'),
'whisperspeech.modules.MultiHeadAttention.__init__': ( 'a. neural '
'modules.html#multiheadattention.__init__',
'whisperspeech/modules.py'),
'whisperspeech.modules.MultiHeadAttention.forward': ( 'a. neural '
'modules.html#multiheadattention.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.MultiHeadAttention.qkv_attention_pth20': ( 'a. neural '
'modules.html#multiheadattention.qkv_attention_pth20',
'whisperspeech/modules.py'),
'whisperspeech.modules.MultiHeadAttention.qkv_attention_vanilla': ( 'a. neural '
'modules.html#multiheadattention.qkv_attention_vanilla',
'whisperspeech/modules.py'),
'whisperspeech.modules.MultiHeadAttention.qkv_attention_xformers': ( 'a. neural '
'modules.html#multiheadattention.qkv_attention_xformers',
'whisperspeech/modules.py'),
'whisperspeech.modules.QueryHead': ('a. neural modules.html#queryhead', 'whisperspeech/modules.py'),
'whisperspeech.modules.ResidualAttentionBlock': ( 'a. neural modules.html#residualattentionblock',
'whisperspeech/modules.py'),
'whisperspeech.modules.ResidualAttentionBlock.__init__': ( 'a. neural '
'modules.html#residualattentionblock.__init__',
'whisperspeech/modules.py'),
'whisperspeech.modules.ResidualAttentionBlock.forward': ( 'a. neural '
'modules.html#residualattentionblock.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.Rotary': ('a. neural modules.html#rotary', 'whisperspeech/modules.py'),
'whisperspeech.modules.Rotary.__init__': ( 'a. neural modules.html#rotary.__init__',
'whisperspeech/modules.py'),
'whisperspeech.modules.Rotary.forward': ( 'a. neural modules.html#rotary.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.SumDecoder': ( 'a. neural modules.html#sumdecoder',
'whisperspeech/modules.py'),
'whisperspeech.modules.SumDecoder.__init__': ( 'a. neural modules.html#sumdecoder.__init__',
'whisperspeech/modules.py'),
'whisperspeech.modules.SumDecoder.forward': ( 'a. neural modules.html#sumdecoder.forward',
'whisperspeech/modules.py'),
'whisperspeech.modules.apply_rotary_pos_emb': ( 'a. neural modules.html#apply_rotary_pos_emb',
'whisperspeech/modules.py'),
'whisperspeech.modules.init_transformer': ( 'a. neural modules.html#init_transformer',
'whisperspeech/modules.py'),
'whisperspeech.modules.rotate_half': ( 'a. neural modules.html#rotate_half',
'whisperspeech/modules.py'),
'whisperspeech.modules.sinusoids': ('a. neural modules.html#sinusoids', 'whisperspeech/modules.py')},
'whisperspeech.pipeline': { 'whisperspeech.pipeline.Pipeline': ('7. pipeline.html#pipeline', 'whisperspeech/pipeline.py'),
'whisperspeech.pipeline.Pipeline.__init__': ( '7. pipeline.html#pipeline.__init__',
'whisperspeech/pipeline.py'),
'whisperspeech.pipeline.Pipeline.generate': ( '7. pipeline.html#pipeline.generate',
'whisperspeech/pipeline.py'),
'whisperspeech.pipeline.Pipeline.generate_atoks': ( '7. pipeline.html#pipeline.generate_atoks',
'whisperspeech/pipeline.py'),
'whisperspeech.pipeline.Pipeline.generate_to_file': ( '7. pipeline.html#pipeline.generate_to_file',
'whisperspeech/pipeline.py'),
'whisperspeech.pipeline.Pipeline.generate_to_notebook': ( '7. '
'pipeline.html#pipeline.generate_to_notebook',
'whisperspeech/pipeline.py')},
'whisperspeech.prepare_s2a_dataset': { 'whisperspeech.prepare_s2a_dataset.flac_to_s2a_name': ( '4a. s2a dataset '
'preparation.html#flac_to_s2a_name',
'whisperspeech/prepare_s2a_dataset.py'),
'whisperspeech.prepare_s2a_dataset.prepare_s2a': ( '4a. s2a dataset '
'preparation.html#prepare_s2a',
'whisperspeech/prepare_s2a_dataset.py'),
'whisperspeech.prepare_s2a_dataset.resampler': ( '4a. s2a dataset '
'preparation.html#resampler',
'whisperspeech/prepare_s2a_dataset.py')},
'whisperspeech.prepare_t2s_dataset': { 'whisperspeech.prepare_t2s_dataset.Transcriber': ( '5a. t2s dataset '
'preparation.html#transcriber',
'whisperspeech/prepare_t2s_dataset.py'),
'whisperspeech.prepare_t2s_dataset.Transcriber.__init__': ( '5a. t2s dataset '
'preparation.html#transcriber.__init__',
'whisperspeech/prepare_t2s_dataset.py'),
'whisperspeech.prepare_t2s_dataset.Transcriber.transcribe': ( '5a. t2s dataset '
'preparation.html#transcriber.transcribe',
'whisperspeech/prepare_t2s_dataset.py'),
'whisperspeech.prepare_t2s_dataset.flac_to_t2s_name': ( '5a. t2s dataset '
'preparation.html#flac_to_t2s_name',
'whisperspeech/prepare_t2s_dataset.py'),
'whisperspeech.prepare_t2s_dataset.prepare_t2s': ( '5a. t2s dataset '
'preparation.html#prepare_t2s',
'whisperspeech/prepare_t2s_dataset.py')},
'whisperspeech.s2a_delar_mup_wds': { 'whisperspeech.s2a_delar_mup_wds.CMLMVisual': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.__init__': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual.__init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.add_data': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual.add_data',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.add_table_row': ( '4b. semantic to acoustic '
'token '
'modeling.html#cmlmvisual.add_table_row',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.hide': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual.hide',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.on_iter': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual.on_iter',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.plot': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual.plot',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.CMLMVisual.show': ( '4b. semantic to acoustic token '
'modeling.html#cmlmvisual.show',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.DelSumDecoder': ( '4b. semantic to acoustic token '
'modeling.html#delsumdecoder',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.DelSumDecoder.__init__': ( '4b. semantic to acoustic '
'token '
'modeling.html#delsumdecoder.__init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.DelSumDecoder.forward': ( '4b. semantic to acoustic '
'token '
'modeling.html#delsumdecoder.forward',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.EmbeddingProjector': ( '4b. semantic to acoustic token '
'modeling.html#embeddingprojector',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention': ( '4b. semantic to acoustic token '
'modeling.html#multiheadattention',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.__init__': ( '4b. semantic to '
'acoustic token '
'modeling.html#multiheadattention.__init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.forward': ( '4b. semantic to acoustic '
'token '
'modeling.html#multiheadattention.forward',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.qkv_attention_pth20': ( '4b. semantic '
'to acoustic '
'token '
'modeling.html#multiheadattention.qkv_attention_pth20',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.MultiHeadAttention.qkv_attention_xformers': ( '4b. '
'semantic '
'to '
'acoustic '
'token '
'modeling.html#multiheadattention.qkv_attention_xformers',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock': ( '4b. semantic to acoustic '
'token '
'modeling.html#residualattentionblock',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock.__init__': ( '4b. semantic to '
'acoustic token '
'modeling.html#residualattentionblock.__init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.ResidualAttentionBlock.forward': ( '4b. semantic to '
'acoustic token '
'modeling.html#residualattentionblock.forward',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.Rotary': ( '4b. semantic to acoustic token '
'modeling.html#rotary',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.Rotary.__init__': ( '4b. semantic to acoustic token '
'modeling.html#rotary.__init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.Rotary.forward': ( '4b. semantic to acoustic token '
'modeling.html#rotary.forward',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer': ( '4b. semantic to acoustic token '
'modeling.html#sadelartransformer',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.__init__': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.__init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.device': ( '4b. semantic to acoustic '
'token '
'modeling.html#sadelartransformer.device',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.embed_stoks': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.embed_stoks',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.forward': ( '4b. semantic to acoustic '
'token '
'modeling.html#sadelartransformer.forward',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.generate': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.generate',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.get_extra_state': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.get_extra_state',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.get_metrics': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.get_metrics',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.init_transformer': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.init_transformer',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_checkpoint': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.load_checkpoint',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_frozen_semantic_embeddings': ( '4b. '
'semantic '
'to '
'acoustic '
'token '
'modeling.html#sadelartransformer.load_frozen_semantic_embeddings',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.load_model': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.load_model',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.save_model': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.save_model',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.set_extra_state': ( '4b. semantic to '
'acoustic token '
'modeling.html#sadelartransformer.set_extra_state',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.SADelARTransformer.setup': ( '4b. semantic to acoustic '
'token '
'modeling.html#sadelartransformer.setup',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.Tunables': ( '4b. semantic to acoustic token '
'modeling.html#tunables',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.Tunables.__post_init__': ( '4b. semantic to acoustic '
'token '
'modeling.html#tunables.__post_init__',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.Tunables.upgrade': ( '4b. semantic to acoustic token '
'modeling.html#tunables.upgrade',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds._make_model': ( '4b. semantic to acoustic token '
'modeling.html#_make_model',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.apply_rotary_pos_emb': ( '4b. semantic to acoustic token '
'modeling.html#apply_rotary_pos_emb',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.load_datasets': ( '4b. semantic to acoustic token '
'modeling.html#load_datasets',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.make_model': ( '4b. semantic to acoustic token '
'modeling.html#make_model',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.pad_samples': ( '4b. semantic to acoustic token '
'modeling.html#pad_samples',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.rand': ( '4b. semantic to acoustic token '
'modeling.html#rand',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.random_trunc': ( '4b. semantic to acoustic token '
'modeling.html#random_trunc',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.rotate_half': ( '4b. semantic to acoustic token '
'modeling.html#rotate_half',
'whisperspeech/s2a_delar_mup_wds.py'),
'whisperspeech.s2a_delar_mup_wds.speaker_id_extractor': ( '4b. semantic to acoustic token '
'modeling.html#speaker_id_extractor',
'whisperspeech/s2a_delar_mup_wds.py')},
'whisperspeech.t2s_up_wds': { 'whisperspeech.t2s_up_wds.CharTokenizer': ( '5b. text to semantic token '
'modeling.html#chartokenizer',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.CharTokenizer.decode': ( '5b. text to semantic token '
'modeling.html#chartokenizer.decode',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.CharTokenizer.encode': ( '5b. text to semantic token '
'modeling.html#chartokenizer.encode',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Decoder': ( '5b. text to semantic token modeling.html#decoder',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Decoder.__init__': ( '5b. text to semantic token '
'modeling.html#decoder.__init__',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Decoder.forward': ( '5b. text to semantic token '
'modeling.html#decoder.forward',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.EmbeddingProjector': ( '5b. text to semantic token '
'modeling.html#embeddingprojector',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Encoder': ( '5b. text to semantic token modeling.html#encoder',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Encoder.__init__': ( '5b. text to semantic token '
'modeling.html#encoder.__init__',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Encoder.forward': ( '5b. text to semantic token '
'modeling.html#encoder.forward',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer': ( '5b. text to semantic token '
'modeling.html#tsartransformer',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.__init__': ( '5b. text to semantic token '
'modeling.html#tsartransformer.__init__',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.device': ( '5b. text to semantic token '
'modeling.html#tsartransformer.device',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.ensure_tokenizer': ( '5b. text to semantic token '
'modeling.html#tsartransformer.ensure_tokenizer',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.forward': ( '5b. text to semantic token '
'modeling.html#tsartransformer.forward',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.generate': ( '5b. text to semantic token '
'modeling.html#tsartransformer.generate',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.generate_batch': ( '5b. text to semantic token '
'modeling.html#tsartransformer.generate_batch',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.init_transformer': ( '5b. text to semantic token '
'modeling.html#tsartransformer.init_transformer',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.load_checkpoint': ( '5b. text to semantic token '
'modeling.html#tsartransformer.load_checkpoint',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.load_frozen_semantic_embeddings': ( '5b. text to '
'semantic token '
'modeling.html#tsartransformer.load_frozen_semantic_embeddings',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.load_model': ( '5b. text to semantic token '
'modeling.html#tsartransformer.load_model',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.save_model': ( '5b. text to semantic token '
'modeling.html#tsartransformer.save_model',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.TSARTransformer.setup': ( '5b. text to semantic token '
'modeling.html#tsartransformer.setup',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Tunables': ( '5b. text to semantic token modeling.html#tunables',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.Tunables.__post_init__': ( '5b. text to semantic token '
'modeling.html#tunables.__post_init__',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds._make_model': ( '5b. text to semantic token modeling.html#_make_model',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.ar_padder': ( '5b. text to semantic token modeling.html#ar_padder',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.build_speaker_map': ( '5b. text to semantic token '
'modeling.html#build_speaker_map',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.char_per_seconder': ( '5b. text to semantic token '
'modeling.html#char_per_seconder',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.load_datasets': ( '5b. text to semantic token '
'modeling.html#load_datasets',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.make_model': ( '5b. text to semantic token modeling.html#make_model',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.rand': ( '5b. text to semantic token modeling.html#rand',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.speaker_id_extractor': ( '5b. text to semantic token '
'modeling.html#speaker_id_extractor',
'whisperspeech/t2s_up_wds.py'),
'whisperspeech.t2s_up_wds.tokenizer': ( '5b. text to semantic token modeling.html#tokenizer',
'whisperspeech/t2s_up_wds.py')},
'whisperspeech.train': { 'whisperspeech.train.SimpleVisual': ('b1. training.html#simplevisual', 'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.__init__': ( 'b1. training.html#simplevisual.__init__',
'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.add_data': ( 'b1. training.html#simplevisual.add_data',
'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.add_table_row': ( 'b1. training.html#simplevisual.add_table_row',
'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.hide': ( 'b1. training.html#simplevisual.hide',
'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.on_iter': ( 'b1. training.html#simplevisual.on_iter',
'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.plot': ( 'b1. training.html#simplevisual.plot',
'whisperspeech/train.py'),
'whisperspeech.train.SimpleVisual.show': ( 'b1. training.html#simplevisual.show',
'whisperspeech/train.py'),
'whisperspeech.train.train': ('b1. training.html#train', 'whisperspeech/train.py'),
'whisperspeech.train.validate': ('b1. training.html#validate', 'whisperspeech/train.py')},
'whisperspeech.train_multi': { 'whisperspeech.train_multi.TrainingTask': ( 'b2. training (lightning).html#trainingtask',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.__init__': ( 'b2. training '
'(lightning).html#trainingtask.__init__',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.configure_optimizers': ( 'b2. training '
'(lightning).html#trainingtask.configure_optimizers',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.on_fit_start': ( 'b2. training '
'(lightning).html#trainingtask.on_fit_start',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.on_validation_epoch_end': ( 'b2. training '
'(lightning).html#trainingtask.on_validation_epoch_end',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.test_step': ( 'b2. training '
'(lightning).html#trainingtask.test_step',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.training_step': ( 'b2. training '
'(lightning).html#trainingtask.training_step',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.TrainingTask.validation_step': ( 'b2. training '
'(lightning).html#trainingtask.validation_step',
'whisperspeech/train_multi.py'),
'whisperspeech.train_multi.parse_and_call': ( 'b2. training (lightning).html#parse_and_call',
'whisperspeech/train_multi.py')},
'whisperspeech.vad': { 'whisperspeech.vad.extract_segments': ( '1b. voice activity detection.html#extract_segments',
'whisperspeech/vad.py'),
'whisperspeech.vad.fix_dots_in_names': ( '1b. voice activity detection.html#fix_dots_in_names',
'whisperspeech/vad.py'),
'whisperspeech.vad.flac_to_vad_name': ( '1b. voice activity detection.html#flac_to_vad_name',
'whisperspeech/vad.py'),
'whisperspeech.vad.load_dataset': ( '1b. voice activity detection.html#load_dataset',
'whisperspeech/vad.py'),
'whisperspeech.vad.process_shard': ( '1b. voice activity detection.html#process_shard',
'whisperspeech/vad.py'),
'whisperspeech.vad.segment_audio': ( '1b. voice activity detection.html#segment_audio',
'whisperspeech/vad.py')},
'whisperspeech.verify_wds': { 'whisperspeech.verify_wds.process_shard': ( '0. verify webdataset archives.html#process_shard',
'whisperspeech/verify_wds.py')},
'whisperspeech.vq_stoks': { 'whisperspeech.vq_stoks.RQBottleneckTransformer': ( '2b. whisper quantization (semantic token) '
'model.html#rqbottlenecktransformer',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.__init__': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.__init__',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.decode_text': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.decode_text',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.dequantize': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.dequantize',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.device': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.device',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.downsample_embeddings': ( '2b. whisper '
'quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.downsample_embeddings',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.encode_audio': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.encode_audio',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.encode_mel': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.encode_mel',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.ensure_whisper': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.ensure_whisper',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.extract_teacher': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.extract_teacher',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.forward': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.forward',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.get_metrics': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.get_metrics',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.init_transformer': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.init_transformer',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.load_checkpoint': ( '2b. whisper quantization '
'(semantic token) '
'model.html#rqbottlenecktransformer.load_checkpoint',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.load_model': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.load_model',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.quantize': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.quantize',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.save_model': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.save_model',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.RQBottleneckTransformer.setup': ( '2b. whisper quantization (semantic '
'token) '
'model.html#rqbottlenecktransformer.setup',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.Tunables': ( '2b. whisper quantization (semantic token) '
'model.html#tunables',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.Tunables.__post_init__': ( '2b. whisper quantization (semantic token) '
'model.html#tunables.__post_init__',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.Tunables.upgrade': ( '2b. whisper quantization (semantic token) '
'model.html#tunables.upgrade',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.add_masks': ( '2b. whisper quantization (semantic token) '
'model.html#add_masks',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.derived_dataset': ( '2b. whisper quantization (semantic token) '
'model.html#derived_dataset',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.load_datasets': ( '2b. whisper quantization (semantic token) '
'model.html#load_datasets',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.logrand': ( '2b. whisper quantization (semantic token) model.html#logrand',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.make_model': ( '2b. whisper quantization (semantic token) '
'model.html#make_model',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.merge_in': ( '2b. whisper quantization (semantic token) '
'model.html#merge_in',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.rand': ( '2b. whisper quantization (semantic token) model.html#rand',
'whisperspeech/vq_stoks.py'),
'whisperspeech.vq_stoks.tokenize_text': ( '2b. whisper quantization (semantic token) '
'model.html#tokenize_text',
'whisperspeech/vq_stoks.py')},
'whisperspeech.wer_metrics': { 'whisperspeech.wer_metrics.DfBuilder': ( 'c. word error rate metrics.html#dfbuilder',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.DfBuilder.__init__': ( 'c. word error rate '
'metrics.html#dfbuilder.__init__',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.DfBuilder.df': ( 'c. word error rate metrics.html#dfbuilder.df',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.DfBuilder.push': ( 'c. word error rate metrics.html#dfbuilder.push',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.WERStats': ( 'c. word error rate metrics.html#werstats',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.WERStats.__init__': ( 'c. word error rate '
'metrics.html#werstats.__init__',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.WERStats.push_sample': ( 'c. word error rate '
'metrics.html#werstats.push_sample',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.librispeech_data': ( 'c. word error rate '
'metrics.html#librispeech_data',
'whisperspeech/wer_metrics.py'),
'whisperspeech.wer_metrics.whisper_normalize': ( 'c. word error rate '
'metrics.html#whisper_normalize',
'whisperspeech/wer_metrics.py')},
'whisperspeech.wh_transcribe': { 'whisperspeech.wh_transcribe.chunk_merger': ( '2a. whisper quantization dataset '
'preparation.html#chunk_merger',
'whisperspeech/wh_transcribe.py'),
'whisperspeech.wh_transcribe.flac_to_txt_name': ( '2a. whisper quantization dataset '
'preparation.html#flac_to_txt_name',
'whisperspeech/wh_transcribe.py'),
'whisperspeech.wh_transcribe.merge_in': ( '2a. whisper quantization dataset '
'preparation.html#merge_in',
'whisperspeech/wh_transcribe.py'),
'whisperspeech.wh_transcribe.process_shard': ( '2a. whisper quantization dataset '
'preparation.html#process_shard',
'whisperspeech/wh_transcribe.py'),
'whisperspeech.wh_transcribe.random_cutter': ( '2a. whisper quantization dataset '
'preparation.html#random_cutter',
'whisperspeech/wh_transcribe.py'),
'whisperspeech.wh_transcribe.split_to_chunks': ( '2a. whisper quantization dataset '
'preparation.html#split_to_chunks',
'whisperspeech/wh_transcribe.py'),
'whisperspeech.wh_transcribe.wds_compose': ( '2a. whisper quantization dataset '
'preparation.html#wds_compose',
'whisperspeech/wh_transcribe.py')}}}