from pythainlp.util import text_to_num, text_to_arabic_digit from collections import deque class ThaiWord: def __init__(self) -> None: self.word_number = ['หนึ่ง','สอง','สาม','สี่','ห้า','หก','เจ็ด','แปด','เก้า'] self.word_digit = ['สิบ','ร้อย','พัน','หมื่น','แสน','ล้าน'] self.word_number_specific = ['เอ็ด', 'ยี่'] self.word_digit_specific = ['สิบ'] def iscontains11(self, word) -> bool: return self.word_number_specific[0] == word[-4:] or \ self.word_number_specific[0] == word[0:4] def iscontains2x(self, word) -> bool: return self.word_number_specific[1] == word[0:3] def words_to_number(self, words) -> str: num = '' if len(words) == 1 and words[0] in self.word_digit: # return text if the word is unit num = words else: try: num = text_to_num("".join(words)) # get numeric only in sentence if len(num) > 0: num = num[0] num = f' {int(num):,} ' except Exception: for word in words: num = f'{num}{text_to_arabic_digit(word)}' num = f' {num}' return num def pretty(self, tokens: deque) -> str: has_start_number = False number = [] text = '' while len(tokens) > 0: word = tokens.popleft() if has_start_number: if self.is_number(word) or self.is_digit(word): number.append(word) else: text = f'{text}{self.words_to_number(number)}' has_start_number = False number.clear() # detect the first numeric in sentence if not has_start_number: if self.is_start_number(word): has_start_number = True number.append(word) else: text = f'{text}{word}' if len(tokens) == 0 and len(number) > 0: text = f'{text}{self.words_to_number(number)}' return text def is_start_number(self, word) -> bool: has_start_number = False if word in self.word_number or \ word in self.word_digit or \ self.iscontains2x(word) or \ self.iscontains11(word): has_start_number = True return has_start_number def is_digit(self, word) -> bool: has_digit = False if word in self.word_digit: has_digit = True return has_digit def is_number(self, word) -> bool: has_number = False if word in self.word_number or \ word in self.word_number_specific or \ self.iscontains11(word): has_number = True return has_number