hexgrad commited on
Commit
44aaf39
·
verified ·
1 Parent(s): 4bfb983

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -58,18 +58,14 @@ def parens_to_angles(s):
58
  def split_num(num):
59
  num = num.group()
60
  if '.' in num:
61
- # Decimal
62
- a, b = num.split('.')
63
- return ' point '.join([a, ' '.join(b)])
64
  elif ':' in num:
65
- # Time
66
  h, m = [int(n) for n in num.split(':')]
67
  if m == 0:
68
  return f"{h} o'clock"
69
  elif m < 10:
70
  return f'{h} oh {m}'
71
  return f'{h} {m}'
72
- # Year
73
  year = int(num[:4])
74
  if year < 1100 or year % 1000 < 10:
75
  return num
@@ -82,6 +78,24 @@ def split_num(num):
82
  return f'{left} oh {right}{s}'
83
  return f'{left} {right}{s}'
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  def normalize(text):
86
  # TODO: Custom text normalization rules?
87
  text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
@@ -97,6 +111,8 @@ def normalize(text):
97
  text = re.sub(r'(?<=\n) +(?=\n)', '', text)
98
  text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
99
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
 
 
100
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
101
  text = re.sub(r'(?<=\d)S', ' S', text)
102
  text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
 
58
  def split_num(num):
59
  num = num.group()
60
  if '.' in num:
61
+ return num
 
 
62
  elif ':' in num:
 
63
  h, m = [int(n) for n in num.split(':')]
64
  if m == 0:
65
  return f"{h} o'clock"
66
  elif m < 10:
67
  return f'{h} oh {m}'
68
  return f'{h} {m}'
 
69
  year = int(num[:4])
70
  if year < 1100 or year % 1000 < 10:
71
  return num
 
78
  return f'{left} oh {right}{s}'
79
  return f'{left} {right}{s}'
80
 
81
+ def flip_money(m):
82
+ m = m.group()
83
+ bill = 'dollar' if m[0] == '$' else 'pound'
84
+ if m[-1].isalpha():
85
+ return f'{m[1:]} {bill}s'
86
+ elif '.' not in m:
87
+ s = '' if m[1:] == '1' else 's'
88
+ return f'{m[1:]} {bill}{s}'
89
+ b, c = m[1:].split('.')
90
+ s = '' if b == '1' else 's'
91
+ c = int(c.ljust(2, '0'))
92
+ coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence')
93
+ return f'{b} {bill}{s} and {c} {coins}'
94
+
95
+ def point_num(num):
96
+ a, b = num.group().split('.')
97
+ return ' point '.join([a, ' '.join(b)])
98
+
99
  def normalize(text):
100
  # TODO: Custom text normalization rules?
101
  text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
 
111
  text = re.sub(r'(?<=\n) +(?=\n)', '', text)
112
  text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
113
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
114
+ text = re.sub(r'[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
115
+ text = re.sub(r'\d*\.\d+', point_num, text)
116
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
117
  text = re.sub(r'(?<=\d)S', ' S', text)
118
  text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)