Spaces:

TDLI2024
/

kintu_nemul

Sleeping

App Files Files Community

Alexis Palmer commited on Jan 12

Commit

89609e8

•

1 Parent(s): c00c489

Ahorcado para Mapudungun, first upload

Browse files

Files changed (3) hide show

app.py +138 -0
mapudungun.easy.filtered +71 -0
util.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import gradio as gr
+import util
+import re
+import random
+### load and prepare corpus
+corpus = util.load_single_raw_text_file("mapudungun.easy.filtered")
+corpus = corpus.lower()
+#word_regex = r"[a-z]+"
+#def tokenize(text: str):
+#    return re.findall(word_regex, text)
+#words = tokenize(corpus)
+words = corpus.split()
+#print(words)
+lexicon = set()
+for word in words:
+    lexicon.add(word)
+filtered_lexicon = set()
+for word in lexicon:
+    filtered_lexicon.add(word)
+    #    if 4 <= len(word) <= 6:
+#        filtered_lexicon.add(word)
+print(len(filtered_lexicon))
+def create_hangman_clue(word, guessed_letters):
+    """
+    Given a word and a list of letters, create the correct clue.
+    For instance, if the word is 'apple' and the guessed letters are 'a' and 'l', the clue should be 'a _ _ l _'
+    """
+    clue = ''
+    for letter in word:
+        if letter in guessed_letters:
+            clue += letter + ' '
+        else:
+            clue += '_ '
+    return clue
+def free_hint(current_state):
+    """
+    Give user a free hint by filling in one randomly-selected letter.
+    """
+    word = current_state['word']
+    guessed_letters = current_state['guessed_letters']
+    hint = random.choice(word)
+    while hint in guessed_letters:
+        hint = random.choice(word)
+    guessed_letters.add(hint)
+    clue = create_hangman_clue(word, guessed_letters)
+    return clue
+def pick_new_word(lexicon):
+    lexicon = list(lexicon)
+    return {
+        'word': random.choice(lexicon),
+        'guessed_letters': set(),
+        'remaining_chances': 6
+    }
+def hangman_game(current_state, guess):
+    """Update the current state based on the guess."""
+    guess = guess.lower()
+    if guess in current_state['guessed_letters'] or len(guess) > 1:
+        # Illegal guess, do nothing
+        return (current_state, 'Letra ya encontrada - intenta nuevamente')
+    current_state['guessed_letters'].add(guess)
+    if guess not in current_state['word']:
+        # Wrong guess
+        current_state['remaining_chances'] -= 1
+        if current_state['remaining_chances'] == 0:
+            old_word = current_state['word']
+            # No more chances! New word
+            current_state = pick_new_word(filtered_lexicon)
+            return (current_state, 'No quedan intentos. La palabra era: '+old_word)
+        else:
+            return (current_state, 'Tu letra no está en la palabra :(')
+    else:
+        # Right guess, check if there's any letters left
+        for letter in current_state['word']:
+            if letter not in current_state['guessed_letters']:
+                # Still letters remaining
+                return (current_state, '¡Intento correcto!')
+        # If we made it here, there's no letters left.
+        old_word = current_state['word']
+        current_state = pick_new_word(filtered_lexicon)
+        return (current_state, '😀¡Buen trabajo! La palabra era: '+ old_word)
+def state_changed(current_state):
+    clue = create_hangman_clue(current_state['word'], current_state['guessed_letters'])
+    guessed_letters = current_state['guessed_letters']
+    remaining_chances = current_state['remaining_chances']
+    return (clue, guessed_letters, remaining_chances)
+with gr.Blocks(theme=gr.themes.Soft(), title="Kintu Nemül") as hangman:
+    current_word = gr.State(pick_new_word(filtered_lexicon))
+    gr.Markdown("# Kintu Nemül")
+    with gr.Row():
+        current_word_textbox = gr.Textbox(label="La palabra", interactive=False, value=create_hangman_clue(current_word.value['word'], current_word.value['guessed_letters']))
+        guessed_letters_textbox = gr.Textbox(label="Letras encontradas", interactive=False)
+        remaining_chances_textbox = gr.Textbox(label="Intentos restantes", interactive=False, value=6)
+    guess_textbox = gr.Textbox(label="Adivina la letra y luego aprieta en 'Enviar'")
+    guess_button = gr.Button(value="Enviar")
+    hint_button = gr.Button(value="Aprieta acá para obtener una pista")
+    output_textbox = gr.Textbox(label="Resultado", interactive=False)
+    guess_button.click(fn=hangman_game, inputs=[current_word, guess_textbox], outputs=[current_word, output_textbox])\
+                .then(fn=state_changed, inputs=[current_word], outputs=[current_word_textbox, guessed_letters_textbox, remaining_chances_textbox])
+    hint_button.click(fn=free_hint, inputs=[current_word], outputs=[current_word_textbox])
+hangman.launch(share=True)

mapudungun.easy.filtered ADDED Viewed

	@@ -0,0 +1,71 @@

+diwlliñ
+chod
+kompañ
+ñukekantu
+kallfü
+chaw
+epu
+mellfü
+küla
+kawellu
+wün'
+üñüm
+kinchika
+namün'
+kiñe
+kelüchod
+mari
+kal'
+pütra
+yu
+lig
+ufisa
+pangi
+ñuke
+challwa
+chawkantu
+karü
+kadü
+milla
+kurü
+küwü
+kolü
+kelü
+waka
+dewü
+ñarki
+kapüra
+llampüdken
+llalliñ
+fallke
+kechu
+pilun
+ligkelü
+aylla
+pura
+paine
+nawel
+awawe
+pel'
+mañke
+trulitruli
+longko
+wala
+trewa
+achawall
+meli
+chang
+regle
+kayu
+foro
+lien
+filu
+kewün'
+peyu
+rukü
+luku
+furi
+lipang
+koneku
+nge
+sañwe

util.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import re
+import unicodedata
+def strip_accents(text: str) -> str:
+    """Removes accents from text."""
+    return ''.join(c for c in unicodedata.normalize('NFD', text)
+                  if unicodedata.category(c) != 'Mn')
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+def load_single_raw_text_file(file_name):
+    """Loads a single text file into one large string"""
+    corpus = ""
+    with open(file_name, 'r') as file:
+        file_contents = file.read()
+        corpus += (file_contents + "\n")
+    return corpus
+word_regex = r"[\w|\']+"
+def tokenize(text):
+    return re.findall(word_regex, text)
+def preprocess(text):
+    """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
+    text = strip_accents(text)
+    text = text.lower()
+    tokens = text.split(" ")
+    tokens_filtered = []
+    for token in tokens:
+        # Skip any tokens with special characters
+        if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
+            tokens_filtered.append(token)
+    return tokens_filtered
+def pad(text: list, num_padding: int):
+    """Pads the given text, as a list of strings, with <s> characters between sentences."""
+    padded_text = []
+    # Add initial padding to the first sentence
+    for _ in range(num_padding):
+        padded_text.append("<s>")
+    for word in text:
+        padded_text.append(word)
+        # Every time we see an end punctuation mark, add <s> tokens before it
+        # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
+        if word in [".", "?", "!"]:
+            for _ in range(num_padding):
+                padded_text.append("<s>")
+    return padded_text