UnarineLeo
commited on
Commit
•
e2a34c5
1
Parent(s):
dd1af40
Update app.py
Browse files
app.py
CHANGED
@@ -8,11 +8,11 @@ st.set_page_config(layout="wide")
|
|
8 |
def fill_mask(sentences):
|
9 |
results = {}
|
10 |
warnings = []
|
11 |
-
for language, sentence in sentences.items():
|
12 |
if "<mask>" in sentence:
|
13 |
masked_sentence = sentence.replace('<mask>', unmasker.tokenizer.mask_token)
|
14 |
unmasked = unmasker(masked_sentence)
|
15 |
-
results[
|
16 |
else:
|
17 |
warnings.append(f"Warning: No <mask> token found in sentence: {sentence}")
|
18 |
return results, warnings
|
@@ -31,7 +31,7 @@ if 'text_input' not in st.session_state:
|
|
31 |
if 'warnings' not in st.session_state:
|
32 |
st.session_state['warnings'] = []
|
33 |
|
34 |
-
language_options = ['Choose language','Zulu', 'Tshivenda', 'Sepedi', 'Tswana', 'Tsonga']
|
35 |
|
36 |
with col1:
|
37 |
with st.container():
|
@@ -44,22 +44,22 @@ with col1:
|
|
44 |
with input1:
|
45 |
language = st.selectbox(f"Select language for sentence {i+1}:", language_options, key=f'language_{i}')
|
46 |
with input2:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
input_sentences[language.lower()] = sentence
|
51 |
|
52 |
button1, button2, _ = st.columns([2, 2, 4])
|
53 |
with button1:
|
54 |
if st.button("Test Example"):
|
55 |
-
|
56 |
-
'
|
57 |
-
'
|
58 |
-
'
|
59 |
-
'
|
60 |
-
'
|
61 |
}
|
62 |
-
input_sentences =
|
63 |
result, warnings = fill_mask(input_sentences)
|
64 |
|
65 |
with button2:
|
@@ -83,8 +83,8 @@ with col1:
|
|
83 |
with col2:
|
84 |
with st.container():
|
85 |
st.markdown("Output :bar_chart:")
|
86 |
-
if input_sentences
|
87 |
-
for language, sentence in input_sentences.items():
|
88 |
masked_sentence = sentence.replace('<mask>', unmasker.tokenizer.mask_token)
|
89 |
predictions = unmasker(masked_sentence)
|
90 |
|
@@ -105,8 +105,8 @@ with col2:
|
|
105 |
|
106 |
if 'predictions' in locals():
|
107 |
if result:
|
108 |
-
for language, language_predictions in result.items():
|
109 |
-
original_sentence = input_sentences[
|
110 |
predicted_sentence = replace_mask(original_sentence, language_predictions[0]['token_str'])
|
111 |
st.write(f"{language}: {predicted_sentence}\n")
|
112 |
|
|
|
8 |
def fill_mask(sentences):
|
9 |
results = {}
|
10 |
warnings = []
|
11 |
+
for key, (language, sentence) in sentences.items():
|
12 |
if "<mask>" in sentence:
|
13 |
masked_sentence = sentence.replace('<mask>', unmasker.tokenizer.mask_token)
|
14 |
unmasked = unmasker(masked_sentence)
|
15 |
+
results[key] = (language, unmasked)
|
16 |
else:
|
17 |
warnings.append(f"Warning: No <mask> token found in sentence: {sentence}")
|
18 |
return results, warnings
|
|
|
31 |
if 'warnings' not in st.session_state:
|
32 |
st.session_state['warnings'] = []
|
33 |
|
34 |
+
language_options = ['Choose language', 'Zulu', 'Tshivenda', 'Sepedi', 'Tswana', 'Tsonga']
|
35 |
|
36 |
with col1:
|
37 |
with st.container():
|
|
|
44 |
with input1:
|
45 |
language = st.selectbox(f"Select language for sentence {i+1}:", language_options, key=f'language_{i}')
|
46 |
with input2:
|
47 |
+
sentence = st.text_input(f"Enter sentence for {language} (with <mask>):", key=f'text_input_{i}')
|
48 |
+
if sentence:
|
49 |
+
# Create a unique key for each sentence
|
50 |
+
input_sentences[f'{language.lower()}_{i+1}'] = (language.lower(), sentence)
|
51 |
|
52 |
button1, button2, _ = st.columns([2, 2, 4])
|
53 |
with button1:
|
54 |
if st.button("Test Example"):
|
55 |
+
sample_sentences = {
|
56 |
+
'zulu_1': ('zulu', "Le ndoda ithi izo <mask> ukudla."),
|
57 |
+
'tshivenda_2': ('tshivenda', "Vhana vhane vha kha ḓi bva u bebwa vha kha khombo ya u <mask> nga Listeriosis."),
|
58 |
+
'tshivenda_3': ('tshivenda', "Rabulasi wa <mask> u khou bvelela nga u lima"),
|
59 |
+
'tswana_4': ('tswana', "Monna o <mask> tsamaya."),
|
60 |
+
'tsonga_5': ('tsonga', "N'wana wa xisati u <mask> ku tsaka.")
|
61 |
}
|
62 |
+
input_sentences = sample_sentences
|
63 |
result, warnings = fill_mask(input_sentences)
|
64 |
|
65 |
with button2:
|
|
|
83 |
with col2:
|
84 |
with st.container():
|
85 |
st.markdown("Output :bar_chart:")
|
86 |
+
if input_sentences:
|
87 |
+
for key, (language, sentence) in input_sentences.items():
|
88 |
masked_sentence = sentence.replace('<mask>', unmasker.tokenizer.mask_token)
|
89 |
predictions = unmasker(masked_sentence)
|
90 |
|
|
|
105 |
|
106 |
if 'predictions' in locals():
|
107 |
if result:
|
108 |
+
for key, (language, language_predictions) in result.items():
|
109 |
+
original_sentence = input_sentences[key][1]
|
110 |
predicted_sentence = replace_mask(original_sentence, language_predictions[0]['token_str'])
|
111 |
st.write(f"{language}: {predicted_sentence}\n")
|
112 |
|