Spaces:
Runtime error
Runtime error
File size: 297 Bytes
0379fdb |
1 2 3 4 5 6 7 8 9 10 |
import re
def clean_text(text):
split_punct = re.escape(r'()')
return ' '.join(re.findall(rf"[^\s{split_punct}]+|[{split_punct}]", text))
# Ensure parentheses are probably separated by spaCy tokenizer for CNN/DailyMail dataset.
return text.replace("(", "( ").replace(")", ") ")
|