Spaces:
Runtime error
Runtime error
from pythainlp.util import text_to_num, text_to_arabic_digit | |
from collections import deque | |
class ThaiWord: | |
def __init__(self) -> None: | |
self.word_number = ['หนึ่ง','สอง','สาม','สี่','ห้า','หก','เจ็ด','แปด','เก้า'] | |
self.word_digit = ['สิบ','ร้อย','พัน','หมื่น','แสน','ล้าน'] | |
self.word_number_specific = ['เอ็ด', 'ยี่'] | |
self.word_digit_specific = ['สิบ'] | |
def iscontains11(self, word) -> bool: | |
return self.word_number_specific[0] == word[-4:] or \ | |
self.word_number_specific[0] == word[0:4] | |
def iscontains2x(self, word) -> bool: | |
return self.word_number_specific[1] == word[0:3] | |
def words_to_number(self, words) -> str: | |
num = '' | |
if len(words) == 1 and words[0] in self.word_digit: | |
# return text if the word is unit | |
num = words | |
else: | |
try: | |
num = text_to_num("".join(words)) | |
# get numeric only in sentence | |
if len(num) > 0: | |
num = num[0] | |
num = f' {int(num):,} ' | |
except Exception: | |
for word in words: | |
num = f'{num}{text_to_arabic_digit(word)}' | |
num = f' {num}' | |
return num | |
def pretty(self, tokens: deque) -> str: | |
has_start_number = False | |
number = [] | |
text = '' | |
while len(tokens) > 0: | |
word = tokens.popleft() | |
if has_start_number: | |
if self.is_number(word) or self.is_digit(word): | |
number.append(word) | |
else: | |
text = f'{text}{self.words_to_number(number)}' | |
has_start_number = False | |
number.clear() | |
# detect the first numeric in sentence | |
if not has_start_number: | |
if self.is_start_number(word): | |
has_start_number = True | |
number.append(word) | |
else: | |
text = f'{text}{word}' | |
if len(tokens) == 0 and len(number) > 0: | |
text = f'{text}{self.words_to_number(number)}' | |
return text | |
def is_start_number(self, word) -> bool: | |
has_start_number = False | |
if word in self.word_number or \ | |
word in self.word_digit or \ | |
self.iscontains2x(word) or \ | |
self.iscontains11(word): | |
has_start_number = True | |
return has_start_number | |
def is_digit(self, word) -> bool: | |
has_digit = False | |
if word in self.word_digit: | |
has_digit = True | |
return has_digit | |
def is_number(self, word) -> bool: | |
has_number = False | |
if word in self.word_number or \ | |
word in self.word_number_specific or \ | |
self.iscontains11(word): | |
has_number = True | |
return has_number | |