import streamlit as st import sparknlp from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline # Page configuration st.set_page_config( layout="wide", initial_sidebar_state="auto" ) # CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(model): documentAssembler = DocumentAssembler() \ .setInputCol("text") \ .setOutputCol("documents") t5 = T5Transformer.pretrained(model) \ .setTask("cola:") \ .setInputCols(["documents"])\ .setMaxOutputLength(200)\ .setOutputCol("corrections") pipeline = Pipeline().setStages([documentAssembler, t5]) return pipeline def fit_data(pipeline, data): df = spark.createDataFrame([[data]]).toDF("text") result = pipeline.fit(df).transform(df) return result.select('corrections.result').collect() # Sidebar content model = st.sidebar.selectbox( "Choose the pretrained model", ['t5_base', 't5_small', 't5_large'], help="For more info about the models visit: https://sparknlp.org/models" ) # Set up the page layout title = "Evaluate Sentence Grammar" sub_title = "This demo uses a text-to-text model fine-tuned to evaluate grammatical errors when the task is set to 'cola:'" st.markdown(f'
{title}
', unsafe_allow_html=True) st.markdown(f'
{sub_title}
', unsafe_allow_html=True) # Reference notebook link in sidebar link = """ Open In Colab """ st.sidebar.markdown('Reference notebook:') st.sidebar.markdown(link, unsafe_allow_html=True) # Define the examples examples = [ "She don't knows nothing about what's happening in the office.", "They was playing soccer yesterday when it start raining heavily.", "This car are more faster than that one, but it costed less money.", "I seen him go to the store, but he don't buy nothing from there.", "We was going to the park but it start raining before we could leave." ] # Text selection and analysis selected_text = st.selectbox("Select an example", examples) custom_input = st.text_input("Try it with your own sentence!") text_to_analyze = custom_input if custom_input else selected_text st.write('Text to be evaluated:') HTML_WRAPPER = """
{}
""" st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True) # Initialize Spark and create pipeline spark = init_spark() pipeline = create_pipeline(model) output = fit_data(pipeline, text_to_analyze) # Display transformed sentence st.write("Prediction:") output_text = "".join(output[0][0]) st.markdown(f'
{output_text}
', unsafe_allow_html=True)