Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -55,10 +55,7 @@ def clean_and_standardize_text(text):
|
|
55 |
text = re.sub(r'\(\s*', '(', text)
|
56 |
text = re.sub(r'\s*\)', ')', text)
|
57 |
|
58 |
-
# 11.
|
59 |
-
text = re.sub(r'\b(\d+)\b', '', text)
|
60 |
-
|
61 |
-
# 12. Improve spacing around punctuations
|
62 |
while ' .' in text:
|
63 |
text = text.replace(' .', '.')
|
64 |
|
@@ -72,7 +69,7 @@ def clean_and_standardize_text(text):
|
|
72 |
text = text.replace('- -', '-')
|
73 |
text = text.replace('. -', '.')
|
74 |
|
75 |
-
#
|
76 |
text = re.sub(r'([.,]){2,}', r'\1', text)
|
77 |
text = re.sub(r'(?<=[:.])[:.]+', '', text)
|
78 |
|
|
|
55 |
text = re.sub(r'\(\s*', '(', text)
|
56 |
text = re.sub(r'\s*\)', ')', text)
|
57 |
|
58 |
+
# 11. Improve spacing around punctuations
|
|
|
|
|
|
|
59 |
while ' .' in text:
|
60 |
text = text.replace(' .', '.')
|
61 |
|
|
|
69 |
text = text.replace('- -', '-')
|
70 |
text = text.replace('. -', '.')
|
71 |
|
72 |
+
# 12. Detect two punctuation marks in a row, keeping the last
|
73 |
text = re.sub(r'([.,]){2,}', r'\1', text)
|
74 |
text = re.sub(r'(?<=[:.])[:.]+', '', text)
|
75 |
|