|
""" |
|
This file is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words |
|
""" |
|
import re |
|
|
|
mm_digit = { |
|
"แ": "แแฏแ", |
|
"แ": "แแ
แบ", |
|
"แ": "แแพแ
แบ", |
|
"แ": "แแฏแถ:", |
|
"แ": "แแฑ:", |
|
"แ
": "แแซ:", |
|
"แ": "แแผแฑแฌแแบ", |
|
"แ": "แแฏแแพแ
แบ", |
|
"แ": "แแพแ
แบ", |
|
"แ": "แแญแฏ:", |
|
} |
|
|
|
|
|
rgxPh = "^(แแ|แแ)" |
|
rgxDate = "[แ-แ]{1,2}-[แ-แ]{1,2}-[แ-แ]{4}|[แ-แ]{1,2}\/[แ-แ]{1,2}\/[แ-แ]{4}" |
|
rgxTime = "[แ-แ]{1,2}:[แ-แ]{1,2}" |
|
rgxDec = "[แ-แ]*\.[แ-แ]*" |
|
rgxAmt = "[,แ-แ]+" |
|
|
|
|
|
def convert_digit(num): |
|
""" |
|
@type num str |
|
@param num Myanmar number |
|
@rtype str |
|
@return converted Myanmar spoken words |
|
""" |
|
|
|
converted = "" |
|
nb_digits = len(num) |
|
|
|
def check_if_zero(pos): |
|
return not num[-pos] == "แ" |
|
|
|
def hundred_thousandth_val(): |
|
n = num[:-5] |
|
return ( |
|
("แแญแแบ: " + mm_num2word(n)) |
|
if (n[-2:] == "แแ") |
|
else (mm_num2word(n) + "แแญแแบ: ") |
|
) |
|
|
|
def thousandth_val(): |
|
return mm_digit[num[-4]] + ("แแฑแฌแแบ " if (num[-3:] == "แแแ") else "แแฑแฌแแบแท ") |
|
|
|
def hundredth_val(): |
|
return mm_digit[num[-3]] + ( |
|
"แแฌแท " |
|
if ( |
|
(num[-2] == "แ" and re.match(r"[แ-แ]", num[-1])) |
|
or (re.match(r"[แ-แ]", num[-2]) and num[-1] == "แ") |
|
) |
|
else "แแฌ " |
|
) |
|
|
|
def tenth_val(): |
|
return ("" if (num[-2] == "แ") else mm_digit[num[-2]]) + ( |
|
"แแแบ " if (num[-1] == "แ") else "แแแบแท " |
|
) |
|
|
|
if nb_digits > 5: |
|
converted += hundred_thousandth_val() |
|
if (nb_digits > 4) and check_if_zero(5): |
|
converted += mm_digit[num[-5]] + "แแฑแฌแแบ: " |
|
if (nb_digits > 3) and check_if_zero(4): |
|
converted += thousandth_val() |
|
if (nb_digits > 2) and check_if_zero(3): |
|
converted += hundredth_val() |
|
if (nb_digits > 1) and check_if_zero(2): |
|
converted += tenth_val() |
|
if (nb_digits > 0) and check_if_zero(1): |
|
converted += mm_digit[num[-1]] |
|
|
|
return converted |
|
|
|
|
|
def mm_num2word(num): |
|
""" |
|
Detect type of number and convert accordingly |
|
|
|
@type num str |
|
@param num Myanmar number |
|
@rtype str |
|
@return converted Myanmar spoken words |
|
""" |
|
|
|
word = "" |
|
|
|
|
|
if re.match(r"" + rgxPh, num[:2]): |
|
word = " ".join([(mm_digit[d] if not d == "แ" else "แแฝแแบ") for d in num]) |
|
|
|
elif re.match(r"" + rgxDate, num): |
|
n = re.split(r"-|/", num) |
|
word = ( |
|
convert_digit(n[-1]) |
|
+ " แแฏแแพแ
แบ " |
|
+ convert_digit(n[1]) |
|
+ " แแแญแฏแแบ: " |
|
+ convert_digit(n[0]) |
|
+ " แแแบ" |
|
) |
|
|
|
elif re.match(r"" + rgxTime, num): |
|
n = re.split(r":", num) |
|
word = (convert_digit(n[0]) + " แแฌแแฎ ") + ( |
|
"แแฝแฒ" if (n[1] == "แแ") else (convert_digit(n[1]) + " แแญแแ
แบ") |
|
) |
|
|
|
elif re.match(r"" + rgxDec, num): |
|
n = re.split(r"\.", num) |
|
word = convert_digit(n[0]) + " แแฟแ " + " ".join([mm_digit[d] for d in n[1]]) |
|
|
|
elif re.match(r"" + rgxAmt, num): |
|
word = convert_digit(num.replace(",", "")) |
|
|
|
else: |
|
raise Exception("Cannot convert the provided number format!") |
|
|
|
return word |
|
|
|
|
|
def extract_num(S): |
|
""" |
|
Extract numbers from the input string |
|
|
|
@type S str |
|
@param S Myanmar sentence |
|
@rtype list |
|
@return a list of Myanmar numbers |
|
""" |
|
matchedNums = re.compile( |
|
"%s|%s|%s|%s" % (rgxDate, rgxTime, rgxDec, rgxAmt) |
|
).findall(S) |
|
|
|
return matchedNums |
|
|