hexgrad commited on
Commit
a7e7cf3
·
verified ·
1 Parent(s): 3f2318e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -30,10 +30,12 @@ def parens_to_angles(s):
30
 
31
  def normalize(text):
32
  # TODO: Custom text normalization rules?
33
- text = text.replace('Dr.', 'Doctor')
34
- text = text.replace('Mr.', 'Mister')
35
- text = text.replace('Ms.', 'Miss')
36
- text = text.replace('Mrs.', 'Mrs')
 
 
37
  text = text.replace(chr(8216), "'").replace(chr(8217), "'")
38
  text = text.replace(chr(8220), '"').replace(chr(8221), '"')
39
  text = re.sub(r'[^\S \n]', ' ', text)
 
30
 
31
  def normalize(text):
32
  # TODO: Custom text normalization rules?
33
+ text = re.sub(r'D[Rr]\.(?= [A-Z])', 'Doctor', text)
34
+ text = re.sub(r'Mr\.|MR\.(?= [A-Z])', 'Mister', text)
35
+ text = re.sub(r'Ms\.|MS\.(?= [A-Z])', 'Miss', text)
36
+ text = re.sub(r'Mrs\.|MRS\.(?= [A-Z])', 'Mrs', text)
37
+ text = re.sub(r'etc\.(?! [A-Z])', 'etc', text)
38
+ text = re.sub(r'\b([Yy])eah\b', r"\1e'a", text)
39
  text = text.replace(chr(8216), "'").replace(chr(8217), "'")
40
  text = text.replace(chr(8220), '"').replace(chr(8221), '"')
41
  text = re.sub(r'[^\S \n]', ' ', text)