Spaces:
Runtime error
Runtime error
File size: 3,095 Bytes
79be08a 2f990e6 79be08a 2f990e6 79be08a 2f990e6 79be08a 1b3124c 79be08a 2f990e6 79be08a 1b3124c 79be08a 2f990e6 79be08a 1b3124c 79be08a 2f990e6 79be08a 2f990e6 79be08a 2f990e6 79be08a 1b3124c 2f990e6 79be08a 2f990e6 79be08a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from pythainlp.util import text_to_num, text_to_arabic_digit
from collections import deque
class ThaiWord:
def __init__(self) -> None:
self.word_number = ['หนึ่ง','สอง','สาม','สี่','ห้า','หก','เจ็ด','แปด','เก้า']
self.word_digit = ['สิบ','ร้อย','พัน','หมื่น','แสน','ล้าน']
self.word_number_specific = ['เอ็ด', 'ยี่']
self.word_digit_specific = ['สิบ']
def iscontains11(self, word) -> bool:
return self.word_number_specific[0] == word[-4:] or \
self.word_number_specific[0] == word[0:4]
def iscontains2x(self, word) -> bool:
return self.word_number_specific[1] == word[0:3]
def words_to_number(self, words) -> str:
num = ''
if len(words) == 1 and words[0] in self.word_digit:
# return text if the word is unit
num = words
else:
try:
num = text_to_num("".join(words))
# get numeric only in sentence
if len(num) > 0:
num = num[0]
num = f' {int(num):,} '
except Exception:
for word in words:
num = f'{num}{text_to_arabic_digit(word)}'
num = f' {num}'
return num
def pretty(self, tokens: deque) -> str:
has_start_number = False
number = []
text = ''
while len(tokens) > 0:
word = tokens.popleft()
if has_start_number:
if self.is_number(word) or self.is_digit(word):
number.append(word)
else:
text = f'{text}{self.words_to_number(number)}'
has_start_number = False
number.clear()
# detect the first numeric in sentence
if not has_start_number:
if self.is_start_number(word):
has_start_number = True
number.append(word)
else:
text = f'{text}{word}'
if len(tokens) == 0 and len(number) > 0:
text = f'{text}{self.words_to_number(number)}'
return text
def is_start_number(self, word) -> bool:
has_start_number = False
if word in self.word_number or \
word in self.word_digit or \
self.iscontains2x(word) or \
self.iscontains11(word):
has_start_number = True
return has_start_number
def is_digit(self, word) -> bool:
has_digit = False
if word in self.word_digit:
has_digit = True
return has_digit
def is_number(self, word) -> bool:
has_number = False
if word in self.word_number or \
word in self.word_number_specific or \
self.iscontains11(word):
has_number = True
return has_number
|