Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,879 Bytes
b96e750 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from phonemizer.backend import EspeakBackend
from phonemizer.separator import Separator
from phonemizer.utils import list2str, str2list
from typing import List, Union
import os
import json
import sys
# separator=Separator(phone=' ', word=' _ ', syllable='|'),
separator = Separator(word=" _ ", syllable="|", phone=" ")
phonemizer_zh = EspeakBackend(
"cmn", preserve_punctuation=False, with_stress=False, language_switch="remove-flags"
)
# phonemizer_zh.separator = separator
phonemizer_en = EspeakBackend(
"en-us",
preserve_punctuation=False,
with_stress=False,
language_switch="remove-flags",
)
# phonemizer_en.separator = separator
phonemizer_ja = EspeakBackend(
"ja", preserve_punctuation=False, with_stress=False, language_switch="remove-flags"
)
# phonemizer_ja.separator = separator
phonemizer_ko = EspeakBackend(
"ko", preserve_punctuation=False, with_stress=False, language_switch="remove-flags"
)
# phonemizer_ko.separator = separator
phonemizer_fr = EspeakBackend(
"fr-fr",
preserve_punctuation=False,
with_stress=False,
language_switch="remove-flags",
)
# phonemizer_fr.separator = separator
phonemizer_de = EspeakBackend(
"de", preserve_punctuation=False, with_stress=False, language_switch="remove-flags"
)
# phonemizer_de.separator = separator
lang2backend = {
"zh": phonemizer_zh,
"ja": phonemizer_ja,
"en": phonemizer_en,
"fr": phonemizer_fr,
"ko": phonemizer_ko,
"de": phonemizer_de,
}
with open("./diffrhythm/g2p/utils/mls_en.json", "r") as f:
json_data = f.read()
token = json.loads(json_data)
def phonemizer_g2p(text, language):
langbackend = lang2backend[language]
phonemes = _phonemize(
langbackend,
text,
separator,
strip=True,
njobs=1,
prepend_text=False,
preserve_empty_lines=False,
)
token_id = []
if isinstance(phonemes, list):
for phone in phonemes:
phonemes_split = phone.split(" ")
token_id.append([token[p] for p in phonemes_split if p in token])
else:
phonemes_split = phonemes.split(" ")
token_id = [token[p] for p in phonemes_split if p in token]
return phonemes, token_id
def _phonemize( # pylint: disable=too-many-arguments
backend,
text: Union[str, List[str]],
separator: Separator,
strip: bool,
njobs: int,
prepend_text: bool,
preserve_empty_lines: bool,
):
"""Auxiliary function to phonemize()
Does the phonemization and returns the phonemized text. Raises a
RuntimeError on error.
"""
# remember the text type for output (either list or string)
text_type = type(text)
# force the text as a list
text = [line.strip(os.linesep) for line in str2list(text)]
# if preserving empty lines, note the index of each empty line
if preserve_empty_lines:
empty_lines = [n for n, line in enumerate(text) if not line.strip()]
# ignore empty lines
text = [line for line in text if line.strip()]
if text:
# phonemize the text
phonemized = backend.phonemize(
text, separator=separator, strip=strip, njobs=njobs
)
else:
phonemized = []
# if preserving empty lines, reinsert them into text and phonemized lists
if preserve_empty_lines:
for i in empty_lines: # noqa
if prepend_text:
text.insert(i, "")
phonemized.insert(i, "")
# at that point, the phonemized text is a list of str. Format it as
# expected by the parameters
if prepend_text:
return list(zip(text, phonemized))
if text_type == str:
return list2str(phonemized)
return phonemized
|