gpt2-home / normalizer.py
HamidRezaAttar's picture
add normalizer
b384e43
raw
history blame
503 Bytes
class Normalizer:
def __init__(self):
pass
def remove_repetitions(self, text):
first_ocurrences = []
for sentence in text.split("."):
if sentence not in first_ocurrences:
first_ocurrences.append(sentence)
return '.'.join(first_ocurrences)
def trim_last_sentence(self, text):
return text[:text.rfind(".")+1]
def clean_txt(self, text):
return self.trim_last_sentence(self.remove_repetitions(text))