Spaces:
Sleeping
Sleeping
Alexis Palmer
commited on
Commit
•
89609e8
1
Parent(s):
c00c489
Ahorcado para Mapudungun, first upload
Browse files- app.py +138 -0
- mapudungun.easy.filtered +71 -0
- util.py +79 -0
app.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import util
|
3 |
+
import re
|
4 |
+
import random
|
5 |
+
|
6 |
+
### load and prepare corpus
|
7 |
+
corpus = util.load_single_raw_text_file("mapudungun.easy.filtered")
|
8 |
+
|
9 |
+
corpus = corpus.lower()
|
10 |
+
#word_regex = r"[a-z]+"
|
11 |
+
#def tokenize(text: str):
|
12 |
+
# return re.findall(word_regex, text)
|
13 |
+
|
14 |
+
#words = tokenize(corpus)
|
15 |
+
words = corpus.split()
|
16 |
+
#print(words)
|
17 |
+
|
18 |
+
|
19 |
+
lexicon = set()
|
20 |
+
for word in words:
|
21 |
+
lexicon.add(word)
|
22 |
+
|
23 |
+
filtered_lexicon = set()
|
24 |
+
|
25 |
+
for word in lexicon:
|
26 |
+
filtered_lexicon.add(word)
|
27 |
+
# if 4 <= len(word) <= 6:
|
28 |
+
# filtered_lexicon.add(word)
|
29 |
+
|
30 |
+
print(len(filtered_lexicon))
|
31 |
+
|
32 |
+
|
33 |
+
def create_hangman_clue(word, guessed_letters):
|
34 |
+
"""
|
35 |
+
Given a word and a list of letters, create the correct clue.
|
36 |
+
|
37 |
+
For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
|
38 |
+
"""
|
39 |
+
clue = ''
|
40 |
+
for letter in word:
|
41 |
+
if letter in guessed_letters:
|
42 |
+
clue += letter + ' '
|
43 |
+
else:
|
44 |
+
clue += '_ '
|
45 |
+
return clue
|
46 |
+
|
47 |
+
def free_hint(current_state):
|
48 |
+
"""
|
49 |
+
Give user a free hint by filling in one randomly-selected letter.
|
50 |
+
"""
|
51 |
+
word = current_state['word']
|
52 |
+
guessed_letters = current_state['guessed_letters']
|
53 |
+
|
54 |
+
hint = random.choice(word)
|
55 |
+
while hint in guessed_letters:
|
56 |
+
hint = random.choice(word)
|
57 |
+
|
58 |
+
guessed_letters.add(hint)
|
59 |
+
clue = create_hangman_clue(word, guessed_letters)
|
60 |
+
return clue
|
61 |
+
|
62 |
+
|
63 |
+
def pick_new_word(lexicon):
|
64 |
+
lexicon = list(lexicon)
|
65 |
+
|
66 |
+
return {
|
67 |
+
'word': random.choice(lexicon),
|
68 |
+
'guessed_letters': set(),
|
69 |
+
'remaining_chances': 6
|
70 |
+
}
|
71 |
+
|
72 |
+
|
73 |
+
def hangman_game(current_state, guess):
|
74 |
+
"""Update the current state based on the guess."""
|
75 |
+
guess = guess.lower()
|
76 |
+
|
77 |
+
|
78 |
+
if guess in current_state['guessed_letters'] or len(guess) > 1:
|
79 |
+
# Illegal guess, do nothing
|
80 |
+
return (current_state, 'Letra ya encontrada - intenta nuevamente')
|
81 |
+
|
82 |
+
current_state['guessed_letters'].add(guess)
|
83 |
+
|
84 |
+
if guess not in current_state['word']:
|
85 |
+
# Wrong guess
|
86 |
+
current_state['remaining_chances'] -= 1
|
87 |
+
|
88 |
+
if current_state['remaining_chances'] == 0:
|
89 |
+
old_word = current_state['word']
|
90 |
+
# No more chances! New word
|
91 |
+
current_state = pick_new_word(filtered_lexicon)
|
92 |
+
return (current_state, 'No quedan intentos. La palabra era: '+old_word)
|
93 |
+
else:
|
94 |
+
return (current_state, 'Tu letra no está en la palabra :(')
|
95 |
+
|
96 |
+
else:
|
97 |
+
|
98 |
+
# Right guess, check if there's any letters left
|
99 |
+
for letter in current_state['word']:
|
100 |
+
if letter not in current_state['guessed_letters']:
|
101 |
+
# Still letters remaining
|
102 |
+
return (current_state, '¡Intento correcto!')
|
103 |
+
|
104 |
+
# If we made it here, there's no letters left.
|
105 |
+
old_word = current_state['word']
|
106 |
+
current_state = pick_new_word(filtered_lexicon)
|
107 |
+
return (current_state, '😀¡Buen trabajo! La palabra era: '+ old_word)
|
108 |
+
|
109 |
+
|
110 |
+
def state_changed(current_state):
|
111 |
+
clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
|
112 |
+
guessed_letters = current_state['guessed_letters']
|
113 |
+
remaining_chances = current_state['remaining_chances']
|
114 |
+
return (clue, guessed_letters, remaining_chances)
|
115 |
+
|
116 |
+
|
117 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Kintu Nemül") as hangman:
|
118 |
+
current_word = gr.State(pick_new_word(filtered_lexicon))
|
119 |
+
|
120 |
+
gr.Markdown("# Kintu Nemül")
|
121 |
+
|
122 |
+
with gr.Row():
|
123 |
+
current_word_textbox = gr.Textbox(label="La palabra", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
|
124 |
+
guessed_letters_textbox = gr.Textbox(label="Letras encontradas", interactive=False)
|
125 |
+
remaining_chances_textbox = gr.Textbox(label="Intentos restantes", interactive=False, value=6)
|
126 |
+
|
127 |
+
guess_textbox = gr.Textbox(label="Adivina la letra y luego aprieta en 'Enviar'")
|
128 |
+
guess_button = gr.Button(value="Enviar")
|
129 |
+
hint_button = gr.Button(value="Aprieta acá para obtener una pista")
|
130 |
+
|
131 |
+
output_textbox = gr.Textbox(label="Resultado", interactive=False)
|
132 |
+
|
133 |
+
|
134 |
+
guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
|
135 |
+
.then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
|
136 |
+
hint_button.click(fn=free_hint, inputs=[current_word], outputs=[current_word_textbox])
|
137 |
+
|
138 |
+
hangman.launch(share=True)
|
mapudungun.easy.filtered
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
diwlliñ
|
2 |
+
chod
|
3 |
+
kompañ
|
4 |
+
ñukekantu
|
5 |
+
kallfü
|
6 |
+
chaw
|
7 |
+
epu
|
8 |
+
mellfü
|
9 |
+
küla
|
10 |
+
kawellu
|
11 |
+
wün'
|
12 |
+
üñüm
|
13 |
+
kinchika
|
14 |
+
namün'
|
15 |
+
kiñe
|
16 |
+
kelüchod
|
17 |
+
mari
|
18 |
+
kal'
|
19 |
+
pütra
|
20 |
+
yu
|
21 |
+
lig
|
22 |
+
ufisa
|
23 |
+
pangi
|
24 |
+
ñuke
|
25 |
+
challwa
|
26 |
+
chawkantu
|
27 |
+
karü
|
28 |
+
kadü
|
29 |
+
milla
|
30 |
+
kurü
|
31 |
+
küwü
|
32 |
+
kolü
|
33 |
+
kelü
|
34 |
+
waka
|
35 |
+
dewü
|
36 |
+
ñarki
|
37 |
+
kapüra
|
38 |
+
llampüdken
|
39 |
+
llalliñ
|
40 |
+
fallke
|
41 |
+
kechu
|
42 |
+
pilun
|
43 |
+
ligkelü
|
44 |
+
aylla
|
45 |
+
pura
|
46 |
+
paine
|
47 |
+
nawel
|
48 |
+
awawe
|
49 |
+
pel'
|
50 |
+
mañke
|
51 |
+
trulitruli
|
52 |
+
longko
|
53 |
+
wala
|
54 |
+
trewa
|
55 |
+
achawall
|
56 |
+
meli
|
57 |
+
chang
|
58 |
+
regle
|
59 |
+
kayu
|
60 |
+
foro
|
61 |
+
lien
|
62 |
+
filu
|
63 |
+
kewün'
|
64 |
+
peyu
|
65 |
+
rukü
|
66 |
+
luku
|
67 |
+
furi
|
68 |
+
lipang
|
69 |
+
koneku
|
70 |
+
nge
|
71 |
+
sañwe
|
util.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import unicodedata
|
4 |
+
|
5 |
+
def strip_accents(text: str) -> str:
|
6 |
+
"""Removes accents from text."""
|
7 |
+
return ''.join(c for c in unicodedata.normalize('NFD', text)
|
8 |
+
if unicodedata.category(c) != 'Mn')
|
9 |
+
|
10 |
+
|
11 |
+
def load_raw_text(corpus_directory: str, file_names=None) -> str:
|
12 |
+
"""Loads all the text files in a directory into one large string"""
|
13 |
+
corpus = ""
|
14 |
+
|
15 |
+
for file_name in os.listdir(corpus_directory):
|
16 |
+
# Read the file as a string
|
17 |
+
file_path = os.path.join(corpus_directory, file_name)
|
18 |
+
if os.path.isdir(file_path):
|
19 |
+
continue
|
20 |
+
|
21 |
+
# Make sure we only read text files
|
22 |
+
if ".txt" not in file_name:
|
23 |
+
continue
|
24 |
+
|
25 |
+
with open(file_path, 'r') as file:
|
26 |
+
file_contents = file.read()
|
27 |
+
corpus += (file_contents + "\n")
|
28 |
+
return corpus
|
29 |
+
|
30 |
+
def load_single_raw_text_file(file_name):
|
31 |
+
"""Loads a single text file into one large string"""
|
32 |
+
|
33 |
+
corpus = ""
|
34 |
+
with open(file_name, 'r') as file:
|
35 |
+
file_contents = file.read()
|
36 |
+
corpus += (file_contents + "\n")
|
37 |
+
|
38 |
+
return corpus
|
39 |
+
|
40 |
+
|
41 |
+
word_regex = r"[\w|\']+"
|
42 |
+
def tokenize(text):
|
43 |
+
return re.findall(word_regex, text)
|
44 |
+
|
45 |
+
|
46 |
+
def preprocess(text):
|
47 |
+
"""Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
|
48 |
+
text = strip_accents(text)
|
49 |
+
text = text.lower()
|
50 |
+
|
51 |
+
tokens = text.split(" ")
|
52 |
+
|
53 |
+
tokens_filtered = []
|
54 |
+
for token in tokens:
|
55 |
+
# Skip any tokens with special characters
|
56 |
+
if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
|
57 |
+
tokens_filtered.append(token)
|
58 |
+
return tokens_filtered
|
59 |
+
|
60 |
+
|
61 |
+
def pad(text: list, num_padding: int):
|
62 |
+
"""Pads the given text, as a list of strings, with <s> characters between sentences."""
|
63 |
+
padded_text = []
|
64 |
+
|
65 |
+
# Add initial padding to the first sentence
|
66 |
+
for _ in range(num_padding):
|
67 |
+
padded_text.append("<s>")
|
68 |
+
|
69 |
+
for word in text:
|
70 |
+
padded_text.append(word)
|
71 |
+
|
72 |
+
# Every time we see an end punctuation mark, add <s> tokens before it
|
73 |
+
# REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
|
74 |
+
if word in [".", "?", "!"]:
|
75 |
+
for _ in range(num_padding):
|
76 |
+
padded_text.append("<s>")
|
77 |
+
|
78 |
+
|
79 |
+
return padded_text
|