Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -58,18 +58,14 @@ def parens_to_angles(s):
|
|
58 |
def split_num(num):
|
59 |
num = num.group()
|
60 |
if '.' in num:
|
61 |
-
|
62 |
-
a, b = num.split('.')
|
63 |
-
return ' point '.join([a, ' '.join(b)])
|
64 |
elif ':' in num:
|
65 |
-
# Time
|
66 |
h, m = [int(n) for n in num.split(':')]
|
67 |
if m == 0:
|
68 |
return f"{h} o'clock"
|
69 |
elif m < 10:
|
70 |
return f'{h} oh {m}'
|
71 |
return f'{h} {m}'
|
72 |
-
# Year
|
73 |
year = int(num[:4])
|
74 |
if year < 1100 or year % 1000 < 10:
|
75 |
return num
|
@@ -82,6 +78,24 @@ def split_num(num):
|
|
82 |
return f'{left} oh {right}{s}'
|
83 |
return f'{left} {right}{s}'
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def normalize(text):
|
86 |
# TODO: Custom text normalization rules?
|
87 |
text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
|
@@ -97,6 +111,8 @@ def normalize(text):
|
|
97 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
98 |
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
99 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
|
|
|
|
100 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
101 |
text = re.sub(r'(?<=\d)S', ' S', text)
|
102 |
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
|
|
|
58 |
def split_num(num):
|
59 |
num = num.group()
|
60 |
if '.' in num:
|
61 |
+
return num
|
|
|
|
|
62 |
elif ':' in num:
|
|
|
63 |
h, m = [int(n) for n in num.split(':')]
|
64 |
if m == 0:
|
65 |
return f"{h} o'clock"
|
66 |
elif m < 10:
|
67 |
return f'{h} oh {m}'
|
68 |
return f'{h} {m}'
|
|
|
69 |
year = int(num[:4])
|
70 |
if year < 1100 or year % 1000 < 10:
|
71 |
return num
|
|
|
78 |
return f'{left} oh {right}{s}'
|
79 |
return f'{left} {right}{s}'
|
80 |
|
81 |
+
def flip_money(m):
|
82 |
+
m = m.group()
|
83 |
+
bill = 'dollar' if m[0] == '$' else 'pound'
|
84 |
+
if m[-1].isalpha():
|
85 |
+
return f'{m[1:]} {bill}s'
|
86 |
+
elif '.' not in m:
|
87 |
+
s = '' if m[1:] == '1' else 's'
|
88 |
+
return f'{m[1:]} {bill}{s}'
|
89 |
+
b, c = m[1:].split('.')
|
90 |
+
s = '' if b == '1' else 's'
|
91 |
+
c = int(c.ljust(2, '0'))
|
92 |
+
coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence')
|
93 |
+
return f'{b} {bill}{s} and {c} {coins}'
|
94 |
+
|
95 |
+
def point_num(num):
|
96 |
+
a, b = num.group().split('.')
|
97 |
+
return ' point '.join([a, ' '.join(b)])
|
98 |
+
|
99 |
def normalize(text):
|
100 |
# TODO: Custom text normalization rules?
|
101 |
text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
|
|
|
111 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
112 |
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
113 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
114 |
+
text = re.sub(r'[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
115 |
+
text = re.sub(r'\d*\.\d+', point_num, text)
|
116 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
117 |
text = re.sub(r'(?<=\d)S', ' S', text)
|
118 |
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
|