Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) 2024 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
from phonemizer.backend import EspeakBackend | |
from phonemizer.separator import Separator | |
from phonemizer.utils import list2str, str2list | |
from typing import List, Union | |
import os | |
import json | |
import sys | |
# separator=Separator(phone=' ', word=' _ ', syllable='|'), | |
separator = Separator(word=" _ ", syllable="|", phone=" ") | |
phonemizer_zh = EspeakBackend( | |
"cmn", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" | |
) | |
# phonemizer_zh.separator = separator | |
phonemizer_en = EspeakBackend( | |
"en-us", | |
preserve_punctuation=False, | |
with_stress=False, | |
language_switch="remove-flags", | |
) | |
# phonemizer_en.separator = separator | |
phonemizer_ja = EspeakBackend( | |
"ja", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" | |
) | |
# phonemizer_ja.separator = separator | |
phonemizer_ko = EspeakBackend( | |
"ko", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" | |
) | |
# phonemizer_ko.separator = separator | |
phonemizer_fr = EspeakBackend( | |
"fr-fr", | |
preserve_punctuation=False, | |
with_stress=False, | |
language_switch="remove-flags", | |
) | |
# phonemizer_fr.separator = separator | |
phonemizer_de = EspeakBackend( | |
"de", preserve_punctuation=False, with_stress=False, language_switch="remove-flags" | |
) | |
# phonemizer_de.separator = separator | |
lang2backend = { | |
"zh": phonemizer_zh, | |
"ja": phonemizer_ja, | |
"en": phonemizer_en, | |
"fr": phonemizer_fr, | |
"ko": phonemizer_ko, | |
"de": phonemizer_de, | |
} | |
with open("./models/tts/maskgct/g2p/utils/mls_en.json", "r") as f: | |
json_data = f.read() | |
token = json.loads(json_data) | |
def phonemizer_g2p(text, language): | |
langbackend = lang2backend[language] | |
phonemes = _phonemize( | |
langbackend, | |
text, | |
separator, | |
strip=True, | |
njobs=1, | |
prepend_text=False, | |
preserve_empty_lines=False, | |
) | |
token_id = [] | |
if isinstance(phonemes, list): | |
for phone in phonemes: | |
phonemes_split = phone.split(" ") | |
token_id.append([token[p] for p in phonemes_split if p in token]) | |
else: | |
phonemes_split = phonemes.split(" ") | |
token_id = [token[p] for p in phonemes_split if p in token] | |
return phonemes, token_id | |
def _phonemize( # pylint: disable=too-many-arguments | |
backend, | |
text: Union[str, List[str]], | |
separator: Separator, | |
strip: bool, | |
njobs: int, | |
prepend_text: bool, | |
preserve_empty_lines: bool, | |
): | |
"""Auxiliary function to phonemize() | |
Does the phonemization and returns the phonemized text. Raises a | |
RuntimeError on error. | |
""" | |
# remember the text type for output (either list or string) | |
text_type = type(text) | |
# force the text as a list | |
text = [line.strip(os.linesep) for line in str2list(text)] | |
# if preserving empty lines, note the index of each empty line | |
if preserve_empty_lines: | |
empty_lines = [n for n, line in enumerate(text) if not line.strip()] | |
# ignore empty lines | |
text = [line for line in text if line.strip()] | |
if text: | |
# phonemize the text | |
phonemized = backend.phonemize( | |
text, separator=separator, strip=strip, njobs=njobs | |
) | |
else: | |
phonemized = [] | |
# if preserving empty lines, reinsert them into text and phonemized lists | |
if preserve_empty_lines: | |
for i in empty_lines: # noqa | |
if prepend_text: | |
text.insert(i, "") | |
phonemized.insert(i, "") | |
# at that point, the phonemized text is a list of str. Format it as | |
# expected by the parameters | |
if prepend_text: | |
return list(zip(text, phonemized)) | |
if text_type == str: | |
return list2str(phonemized) | |
return phonemized | |