Alexis Palmer commited on
Commit
89609e8
1 Parent(s): c00c489

Ahorcado para Mapudungun, first upload

Browse files
Files changed (3) hide show
  1. app.py +138 -0
  2. mapudungun.easy.filtered +71 -0
  3. util.py +79 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import util
3
+ import re
4
+ import random
5
+
6
+ ### load and prepare corpus
7
+ corpus = util.load_single_raw_text_file("mapudungun.easy.filtered")
8
+
9
+ corpus = corpus.lower()
10
+ #word_regex = r"[a-z]+"
11
+ #def tokenize(text: str):
12
+ # return re.findall(word_regex, text)
13
+
14
+ #words = tokenize(corpus)
15
+ words = corpus.split()
16
+ #print(words)
17
+
18
+
19
+ lexicon = set()
20
+ for word in words:
21
+ lexicon.add(word)
22
+
23
+ filtered_lexicon = set()
24
+
25
+ for word in lexicon:
26
+ filtered_lexicon.add(word)
27
+ # if 4 <= len(word) <= 6:
28
+ # filtered_lexicon.add(word)
29
+
30
+ print(len(filtered_lexicon))
31
+
32
+
33
+ def create_hangman_clue(word, guessed_letters):
34
+ """
35
+ Given a word and a list of letters, create the correct clue.
36
+
37
+ For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
38
+ """
39
+ clue = ''
40
+ for letter in word:
41
+ if letter in guessed_letters:
42
+ clue += letter + ' '
43
+ else:
44
+ clue += '_ '
45
+ return clue
46
+
47
+ def free_hint(current_state):
48
+ """
49
+ Give user a free hint by filling in one randomly-selected letter.
50
+ """
51
+ word = current_state['word']
52
+ guessed_letters = current_state['guessed_letters']
53
+
54
+ hint = random.choice(word)
55
+ while hint in guessed_letters:
56
+ hint = random.choice(word)
57
+
58
+ guessed_letters.add(hint)
59
+ clue = create_hangman_clue(word, guessed_letters)
60
+ return clue
61
+
62
+
63
+ def pick_new_word(lexicon):
64
+ lexicon = list(lexicon)
65
+
66
+ return {
67
+ 'word': random.choice(lexicon),
68
+ 'guessed_letters': set(),
69
+ 'remaining_chances': 6
70
+ }
71
+
72
+
73
+ def hangman_game(current_state, guess):
74
+ """Update the current state based on the guess."""
75
+ guess = guess.lower()
76
+
77
+
78
+ if guess in current_state['guessed_letters'] or len(guess) > 1:
79
+ # Illegal guess, do nothing
80
+ return (current_state, 'Letra ya encontrada - intenta nuevamente')
81
+
82
+ current_state['guessed_letters'].add(guess)
83
+
84
+ if guess not in current_state['word']:
85
+ # Wrong guess
86
+ current_state['remaining_chances'] -= 1
87
+
88
+ if current_state['remaining_chances'] == 0:
89
+ old_word = current_state['word']
90
+ # No more chances! New word
91
+ current_state = pick_new_word(filtered_lexicon)
92
+ return (current_state, 'No quedan intentos. La palabra era: '+old_word)
93
+ else:
94
+ return (current_state, 'Tu letra no está en la palabra :(')
95
+
96
+ else:
97
+
98
+ # Right guess, check if there's any letters left
99
+ for letter in current_state['word']:
100
+ if letter not in current_state['guessed_letters']:
101
+ # Still letters remaining
102
+ return (current_state, '¡Intento correcto!')
103
+
104
+ # If we made it here, there's no letters left.
105
+ old_word = current_state['word']
106
+ current_state = pick_new_word(filtered_lexicon)
107
+ return (current_state, '😀¡Buen trabajo! La palabra era: '+ old_word)
108
+
109
+
110
+ def state_changed(current_state):
111
+ clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
112
+ guessed_letters = current_state['guessed_letters']
113
+ remaining_chances = current_state['remaining_chances']
114
+ return (clue, guessed_letters, remaining_chances)
115
+
116
+
117
+ with gr.Blocks(theme=gr.themes.Soft(), title="Kintu Nemül") as hangman:
118
+ current_word = gr.State(pick_new_word(filtered_lexicon))
119
+
120
+ gr.Markdown("# Kintu Nemül")
121
+
122
+ with gr.Row():
123
+ current_word_textbox = gr.Textbox(label="La palabra", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
124
+ guessed_letters_textbox = gr.Textbox(label="Letras encontradas", interactive=False)
125
+ remaining_chances_textbox = gr.Textbox(label="Intentos restantes", interactive=False, value=6)
126
+
127
+ guess_textbox = gr.Textbox(label="Adivina la letra y luego aprieta en 'Enviar'")
128
+ guess_button = gr.Button(value="Enviar")
129
+ hint_button = gr.Button(value="Aprieta acá para obtener una pista")
130
+
131
+ output_textbox = gr.Textbox(label="Resultado", interactive=False)
132
+
133
+
134
+ guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
135
+ .then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
136
+ hint_button.click(fn=free_hint, inputs=[current_word], outputs=[current_word_textbox])
137
+
138
+ hangman.launch(share=True)
mapudungun.easy.filtered ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diwlliñ
2
+ chod
3
+ kompañ
4
+ ñukekantu
5
+ kallfü
6
+ chaw
7
+ epu
8
+ mellfü
9
+ küla
10
+ kawellu
11
+ wün'
12
+ üñüm
13
+ kinchika
14
+ namün'
15
+ kiñe
16
+ kelüchod
17
+ mari
18
+ kal'
19
+ pütra
20
+ yu
21
+ lig
22
+ ufisa
23
+ pangi
24
+ ñuke
25
+ challwa
26
+ chawkantu
27
+ karü
28
+ kadü
29
+ milla
30
+ kurü
31
+ küwü
32
+ kolü
33
+ kelü
34
+ waka
35
+ dewü
36
+ ñarki
37
+ kapüra
38
+ llampüdken
39
+ llalliñ
40
+ fallke
41
+ kechu
42
+ pilun
43
+ ligkelü
44
+ aylla
45
+ pura
46
+ paine
47
+ nawel
48
+ awawe
49
+ pel'
50
+ mañke
51
+ trulitruli
52
+ longko
53
+ wala
54
+ trewa
55
+ achawall
56
+ meli
57
+ chang
58
+ regle
59
+ kayu
60
+ foro
61
+ lien
62
+ filu
63
+ kewün'
64
+ peyu
65
+ rukü
66
+ luku
67
+ furi
68
+ lipang
69
+ koneku
70
+ nge
71
+ sañwe
util.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import unicodedata
4
+
5
+ def strip_accents(text: str) -> str:
6
+ """Removes accents from text."""
7
+ return ''.join(c for c in unicodedata.normalize('NFD', text)
8
+ if unicodedata.category(c) != 'Mn')
9
+
10
+
11
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
12
+ """Loads all the text files in a directory into one large string"""
13
+ corpus = ""
14
+
15
+ for file_name in os.listdir(corpus_directory):
16
+ # Read the file as a string
17
+ file_path = os.path.join(corpus_directory, file_name)
18
+ if os.path.isdir(file_path):
19
+ continue
20
+
21
+ # Make sure we only read text files
22
+ if ".txt" not in file_name:
23
+ continue
24
+
25
+ with open(file_path, 'r') as file:
26
+ file_contents = file.read()
27
+ corpus += (file_contents + "\n")
28
+ return corpus
29
+
30
+ def load_single_raw_text_file(file_name):
31
+ """Loads a single text file into one large string"""
32
+
33
+ corpus = ""
34
+ with open(file_name, 'r') as file:
35
+ file_contents = file.read()
36
+ corpus += (file_contents + "\n")
37
+
38
+ return corpus
39
+
40
+
41
+ word_regex = r"[\w|\']+"
42
+ def tokenize(text):
43
+ return re.findall(word_regex, text)
44
+
45
+
46
+ def preprocess(text):
47
+ """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
48
+ text = strip_accents(text)
49
+ text = text.lower()
50
+
51
+ tokens = text.split(" ")
52
+
53
+ tokens_filtered = []
54
+ for token in tokens:
55
+ # Skip any tokens with special characters
56
+ if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
57
+ tokens_filtered.append(token)
58
+ return tokens_filtered
59
+
60
+
61
+ def pad(text: list, num_padding: int):
62
+ """Pads the given text, as a list of strings, with <s> characters between sentences."""
63
+ padded_text = []
64
+
65
+ # Add initial padding to the first sentence
66
+ for _ in range(num_padding):
67
+ padded_text.append("<s>")
68
+
69
+ for word in text:
70
+ padded_text.append(word)
71
+
72
+ # Every time we see an end punctuation mark, add <s> tokens before it
73
+ # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
74
+ if word in [".", "?", "!"]:
75
+ for _ in range(num_padding):
76
+ padded_text.append("<s>")
77
+
78
+
79
+ return padded_text