esnya commited on
Commit
d62201b
·
1 Parent(s): 1bfcb70

speecht5_tts_jvs_ver1_e20_openjtalk_longer_20230809-031157_tokenizer

Browse files
Files changed (1) hide show
  1. speecht5_openjtalk_tokenizer.py +10 -5
speecht5_openjtalk_tokenizer.py CHANGED
@@ -9,6 +9,9 @@ from itertools import chain
9
  from typing import List, Optional
10
 
11
 
 
 
 
12
  def _g2p_with_np(text: str, np_lsit: str) -> List[str]:
13
  from pyopenjtalk import g2p
14
 
@@ -25,15 +28,11 @@ def _g2p_with_np(text: str, np_lsit: str) -> List[str]:
25
  )
26
 
27
 
28
- NP_CHARCTERS = " !\"#$%&'()=~|`{+*}<>?_-^\\@[;:],./ !”#$%&’()=~|`{+*}<>?_ー^¥@「;:」、。・`"
29
-
30
-
31
  class SpeechT5OpenjtalkTokenizer(SpeechT5Tokenizer):
32
- vocab_files_names = {"vocab_file": "spm_char.model"}
33
  pretrained_vocab_files_map = {}
34
  max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
  model_input_names = ["input_ids", "attention_mask"]
36
- label2id = {}
37
 
38
  def __init__(
39
  self,
@@ -58,6 +57,7 @@ class SpeechT5OpenjtalkTokenizer(SpeechT5Tokenizer):
58
  pass
59
 
60
  self.non_phenome_characters = non_phenome_characters
 
61
 
62
  if isinstance(vocab_file, str) and vocab_file.endswith(".json"):
63
  with open(vocab_file, encoding="utf-8") as f:
@@ -75,6 +75,11 @@ class SpeechT5OpenjtalkTokenizer(SpeechT5Tokenizer):
75
  def get_vocab(self):
76
  return self.label2id
77
 
 
 
 
 
 
78
  def save_vocabulary(
79
  self, save_directory: str, filename_prefix: Optional[str] = None
80
  ):
 
9
  from typing import List, Optional
10
 
11
 
12
+ NP_CHARCTERS = " !\"#$%&'()=~|`{+*}<>?_-^\\@[;:],./ !”#$%&’()=~|`{+*}<>?_ー^¥@「;:」、。・`"
13
+
14
+
15
  def _g2p_with_np(text: str, np_lsit: str) -> List[str]:
16
  from pyopenjtalk import g2p
17
 
 
28
  )
29
 
30
 
 
 
 
31
  class SpeechT5OpenjtalkTokenizer(SpeechT5Tokenizer):
32
+ vocab_files_names = {"vocab_file": "vocab.json"}
33
  pretrained_vocab_files_map = {}
34
  max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
  model_input_names = ["input_ids", "attention_mask"]
 
36
 
37
  def __init__(
38
  self,
 
57
  pass
58
 
59
  self.non_phenome_characters = non_phenome_characters
60
+ self.vocab_file = vocab_file
61
 
62
  if isinstance(vocab_file, str) and vocab_file.endswith(".json"):
63
  with open(vocab_file, encoding="utf-8") as f:
 
75
  def get_vocab(self):
76
  return self.label2id
77
 
78
+ def __getstate__(self):
79
+ state = super().__getstate__()
80
+ del state["sp_model"]
81
+ return state
82
+
83
  def save_vocabulary(
84
  self, save_directory: str, filename_prefix: Optional[str] = None
85
  ):