cnmoro commited on
Commit
d1dba9f
·
verified ·
1 Parent(s): 3c78773

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -55,10 +55,7 @@ def clean_and_standardize_text(text):
55
  text = re.sub(r'\(\s*', '(', text)
56
  text = re.sub(r'\s*\)', ')', text)
57
 
58
- # 11. Remove extra numbers without meaning
59
- text = re.sub(r'\b(\d+)\b', '', text)
60
-
61
- # 12. Improve spacing around punctuations
62
  while ' .' in text:
63
  text = text.replace(' .', '.')
64
 
@@ -72,7 +69,7 @@ def clean_and_standardize_text(text):
72
  text = text.replace('- -', '-')
73
  text = text.replace('. -', '.')
74
 
75
- # 13. Detect two punctuation marks in a row, keeping the last
76
  text = re.sub(r'([.,]){2,}', r'\1', text)
77
  text = re.sub(r'(?<=[:.])[:.]+', '', text)
78
 
 
55
  text = re.sub(r'\(\s*', '(', text)
56
  text = re.sub(r'\s*\)', ')', text)
57
 
58
+ # 11. Improve spacing around punctuations
 
 
 
59
  while ' .' in text:
60
  text = text.replace(' .', '.')
61
 
 
69
  text = text.replace('- -', '-')
70
  text = text.replace('. -', '.')
71
 
72
+ # 12. Detect two punctuation marks in a row, keeping the last
73
  text = re.sub(r'([.,]){2,}', r'\1', text)
74
  text = re.sub(r'(?<=[:.])[:.]+', '', text)
75