esnya commited on
Commit
a27e354
·
1 Parent(s): 49c417c

speecht5_tts_jvs_ver1_e20_openjtalk_longer_20230809-031157_tokenizer

Browse files
Files changed (1) hide show
  1. speecht5_openjtalk_tokenizer.py +1 -19
speecht5_openjtalk_tokenizer.py CHANGED
@@ -34,7 +34,6 @@ def _g2p_with_np(text: str, np_lsit: str) -> List[str]:
34
 
35
  VOCAB_FILES_NAMES = {
36
  "vocab_file": "vocab.json",
37
- "tokenizer_file": "tokenizer.json",
38
  }
39
 
40
  PRETRAINED_VOCAB_FILES_MAP = {
@@ -119,24 +118,7 @@ class SpeechT5OpenjtalkTokenizer(SpeechT5Tokenizer):
119
  with open(vocab_path, "w", encoding="utf-8") as f:
120
  json.dump(self.label2id, f, ensure_ascii=False, indent=2)
121
 
122
- # special_tokens_path = Path(save_directory) / Path(
123
- # f"special_tokens_map{filename_prefix}"
124
- # )
125
- # with open(special_tokens_path, "w", encoding="utf-8") as f:
126
- # json.dump(
127
- # {
128
- # "bos_token": self.bos_token,
129
- # "eos_token": self.eos_token,
130
- # "unk_token": self.unk_token,
131
- # "pad_token": self.pad_token,
132
- # "mask_token": self.mask_token,
133
- # },
134
- # f,
135
- # ensure_ascii=False,
136
- # indent=2,
137
- # )
138
-
139
- return str(vocab_path), None # str(special_tokens_path)
140
 
141
  def _tokenize(self, text: str) -> List[str]:
142
  return _g2p_with_np(text, self.non_phenome_characters)
 
34
 
35
  VOCAB_FILES_NAMES = {
36
  "vocab_file": "vocab.json",
 
37
  }
38
 
39
  PRETRAINED_VOCAB_FILES_MAP = {
 
118
  with open(vocab_path, "w", encoding="utf-8") as f:
119
  json.dump(self.label2id, f, ensure_ascii=False, indent=2)
120
 
121
+ return (str(vocab_path),)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  def _tokenize(self, text: str) -> List[str]:
124
  return _g2p_with_np(text, self.non_phenome_characters)