nssharmaofficial commited on
Commit
92562f2
1 Parent(s): 4169569

Fix vocab dict

Browse files
Files changed (1) hide show
  1. source/vocab.py +9 -5
source/vocab.py CHANGED
@@ -78,11 +78,15 @@ class Vocab:
78
 
79
  self.size = len(self.word2index)
80
 
81
- def _add_predefined_tokens(self):
82
- predefined_tokens = ['<pad>', '<sos>', '<eos>', '<unk>']
83
- for index, token in enumerate(predefined_tokens):
84
- self.word2index[token] = index
85
- self.index2word[index] = token
 
 
 
 
86
 
87
  def word_to_index(self, word: str) -> int:
88
  """ Map word to index from word2index dictionary in vocabulary
 
78
 
79
  self.size = len(self.word2index)
80
 
81
+ # adding predefined tokens in the vocabulary
82
+ self.index2word[self.PADDING_INDEX] = '<pad>'
83
+ self.word2index['<pad>'] = self.PADDING_INDEX
84
+ self.index2word[self.SOS] = '<sos>'
85
+ self.word2index['<sos>'] = self.SOS
86
+ self.index2word[self.EOS] = '<eos>'
87
+ self.word2index['<eos>'] = self.EOS
88
+ self.index2word[self.UNKNOWN_WORD_INDEX] = '<unk>'
89
+ self.word2index['<unk>'] = self.UNKNOWN_WORD_INDEX
90
 
91
  def word_to_index(self, word: str) -> int:
92
  """ Map word to index from word2index dictionary in vocabulary