fill-mask-zabantu-xlm-roberta

Running

App Files Files Community

UnarineLeo commited on 22 days ago

Commit

d104ff1

•

1 Parent(s): f5f8f9a

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -14

app.py CHANGED Viewed

@@ -1,45 +1,67 @@
 import streamlit as st
 from transformers import pipeline
 unmasker = pipeline('fill-mask', model='dsfsi/zabantu-bantu-250m')
 sample_sentences = {
-    'Zulu': "Le ndoda ithi izo____ ukudla.",
-    'Tshivenda': "Mufana uyo____ vhukuma.",
-    'Sepedi': "Mosadi o ____ pheka.",
-    'Tswana': "Monna o ____ tsamaya.",
-    'Tsonga': "N'wana wa xisati u ____ ku tsaka."
 }
 def fill_mask_for_languages(sentences):
     results = {}
     for language, sentence in sentences.items():
         masked_sentence = sentence.replace('____', unmasker.tokenizer.mask_token)
         unmasked = unmasker(masked_sentence)
         results[language] = unmasked
     return results
 st.title("Fill Mask for Multiple Languages | Zabantu-Bantu-250m")
 st.write("This app predicts the missing word for sentences in Zulu, Tshivenda, Sepedi, Tswana, and Tsonga using a Zabantu BERT model.")
 st.write("### Sample sentences:")
 for language, sentence in sample_sentences.items():
     st.write(f"**{language}**: {sentence}")
 if st.button("Submit"):
-    result = fill_mask_for_languages(sample_sentences)
-    if result:
-        st.write("### Predictions:")
-        for language, predictions in result.items():
-            original_sentence = sample_sentences[language]
-            predicted_sentence = predictions[0]['sequence']
-            st.write(f"Original sentence ({language}): {original_sentence}")
-            st.write(f"Top prediction for the masked token: {predicted_sentence}\n")
-            st.write("=" * 80)
 css = """
 <style>
 footer {display:none !important}

 import streamlit as st
 from transformers import pipeline
+# Initialize the pipeline for the fill-mask task
 unmasker = pipeline('fill-mask', model='dsfsi/zabantu-bantu-250m')
+# Sample sentences for different languages with placeholders for the masked word
 sample_sentences = {
+    'Zulu': "Le ndoda ithi izo____ ukudla.",  # Masked word for Zulu
+    'Tshivenda': "Mufana uyo____ vhukuma.",  # Masked word for Tshivenda
+    'Sepedi': "Mosadi o ____ pheka.",  # Masked word for Sepedi
+    'Tswana': "Monna o ____ tsamaya.",  # Masked word for Tswana
+    'Tsonga': "N'wana wa xisati u ____ ku tsaka."  # Masked word for Tsonga
 }
+# Function to perform fill-mask on sentences with the token '__' replaced
 def fill_mask_for_languages(sentences):
     results = {}
     for language, sentence in sentences.items():
+        # Replace the '____' placeholder with the model's mask token
         masked_sentence = sentence.replace('____', unmasker.tokenizer.mask_token)
+        # Get predictions for the masked sentence
         unmasked = unmasker(masked_sentence)
+        # Store the result for each language
         results[language] = unmasked
     return results
+# Streamlit interface
 st.title("Fill Mask for Multiple Languages | Zabantu-Bantu-250m")
 st.write("This app predicts the missing word for sentences in Zulu, Tshivenda, Sepedi, Tswana, and Tsonga using a Zabantu BERT model.")
+# Display the original sample sentences
 st.write("### Sample sentences:")
 for language, sentence in sample_sentences.items():
     st.write(f"**{language}**: {sentence}")
+# User input for custom sentences
+user_sentence = st.text_input("Enter your own sentence with a masked word (use '____'):", "Enter sentence here...")
+# Add a submit button
 if st.button("Submit"):
+    # Prepare user input for prediction
+    user_masked_sentence = user_sentence.replace('____', unmasker.tokenizer.mask_token)
+    # Get predictions for the user input sentence
+    user_predictions = unmasker(user_masked_sentence)
+    # Display results for user input
+    st.write("### Your Input:")
+    st.write(f"Original sentence: {user_sentence}")
+    st.write(f"Top prediction for the masked token: {user_predictions[0]['sequence']}")
+    # Display results for sample sentences
+    st.write("### Predictions for Sample Sentences:")
+    for language, predictions in fill_mask_for_languages(sample_sentences).items():
+        original_sentence = sample_sentences[language]
+        predicted_sentence = predictions[0]['sequence']
+        st.write(f"Original sentence ({language}): {original_sentence}")
+        st.write(f"Top prediction for the masked token: {predicted_sentence}\n")
+        st.write("=" * 80)
+# Custom CSS styling for Streamlit elements
 css = """
 <style>
 footer {display:none !important}