abdullahmubeen10
commited on
Commit
•
fe05f12
1
Parent(s):
eca660b
Upload 5 files
Browse files- .streamlit/config.toml +3 -0
- Demo.py +109 -0
- Dockerfile +72 -0
- pages/Workflow & Model Overview.py +167 -0
- requirements.txt +7 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
primaryColor="#29B4E8"
|
Demo.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import sparknlp
|
3 |
+
|
4 |
+
from sparknlp.base import *
|
5 |
+
from sparknlp.annotator import *
|
6 |
+
from pyspark.ml import Pipeline
|
7 |
+
|
8 |
+
# Page configuration
|
9 |
+
st.set_page_config(
|
10 |
+
layout="wide",
|
11 |
+
initial_sidebar_state="auto"
|
12 |
+
)
|
13 |
+
|
14 |
+
# CSS for styling
|
15 |
+
st.markdown("""
|
16 |
+
<style>
|
17 |
+
.main-title {
|
18 |
+
font-size: 36px;
|
19 |
+
color: #4A90E2;
|
20 |
+
font-weight: bold;
|
21 |
+
text-align: center;
|
22 |
+
}
|
23 |
+
.section {
|
24 |
+
background-color: #f9f9f9;
|
25 |
+
padding: 10px;
|
26 |
+
border-radius: 10px;
|
27 |
+
margin-top: 10px;
|
28 |
+
}
|
29 |
+
.section p, .section ul {
|
30 |
+
color: #666666;
|
31 |
+
}
|
32 |
+
</style>
|
33 |
+
""", unsafe_allow_html=True)
|
34 |
+
|
35 |
+
@st.cache_resource
|
36 |
+
def init_spark():
|
37 |
+
return sparknlp.start()
|
38 |
+
|
39 |
+
@st.cache_resource
|
40 |
+
def create_pipeline(model):
|
41 |
+
documentAssembler = DocumentAssembler() \
|
42 |
+
.setInputCol("text") \
|
43 |
+
.setOutputCol("documents")
|
44 |
+
|
45 |
+
t5 = T5Transformer.pretrained(model) \
|
46 |
+
.setTask("cola:") \
|
47 |
+
.setInputCols(["documents"])\
|
48 |
+
.setMaxOutputLength(200)\
|
49 |
+
.setOutputCol("corrections")
|
50 |
+
|
51 |
+
pipeline = Pipeline().setStages([documentAssembler, t5])
|
52 |
+
return pipeline
|
53 |
+
|
54 |
+
def fit_data(pipeline, data):
|
55 |
+
df = spark.createDataFrame([[data]]).toDF("text")
|
56 |
+
result = pipeline.fit(df).transform(df)
|
57 |
+
return result.select('corrections.result').collect()
|
58 |
+
|
59 |
+
# Sidebar content
|
60 |
+
model = st.sidebar.selectbox(
|
61 |
+
"Choose the pretrained model",
|
62 |
+
['t5_base', 't5_small', 't5_large'],
|
63 |
+
help="For more info about the models visit: https://sparknlp.org/models"
|
64 |
+
)
|
65 |
+
|
66 |
+
# Set up the page layout
|
67 |
+
title = "Evaluate Sentence Grammar"
|
68 |
+
sub_title = "This demo uses a text-to-text model fine-tuned to evaluate grammatical errors when the task is set to 'cola:'"
|
69 |
+
|
70 |
+
st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
|
71 |
+
st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
|
72 |
+
|
73 |
+
# Reference notebook link in sidebar
|
74 |
+
link = """
|
75 |
+
<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb#scrollTo=QAZ3vOX_SW7B">
|
76 |
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
|
77 |
+
</a>
|
78 |
+
"""
|
79 |
+
st.sidebar.markdown('Reference notebook:')
|
80 |
+
st.sidebar.markdown(link, unsafe_allow_html=True)
|
81 |
+
|
82 |
+
# Define the examples
|
83 |
+
examples = [
|
84 |
+
"She don't knows nothing about what's happening in the office.",
|
85 |
+
"They was playing soccer yesterday when it start raining heavily.",
|
86 |
+
"This car are more faster than that one, but it costed less money.",
|
87 |
+
"I seen him go to the store, but he don't buy nothing from there.",
|
88 |
+
"We was going to the park but it start raining before we could leave."
|
89 |
+
]
|
90 |
+
|
91 |
+
# Text selection and analysis
|
92 |
+
selected_text = st.selectbox("Select an example", examples)
|
93 |
+
custom_input = st.text_input("Try it with your own sentence!")
|
94 |
+
|
95 |
+
text_to_analyze = custom_input if custom_input else selected_text
|
96 |
+
|
97 |
+
st.write('Text to be evaluated:')
|
98 |
+
HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
|
99 |
+
st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
|
100 |
+
|
101 |
+
# Initialize Spark and create pipeline
|
102 |
+
spark = init_spark()
|
103 |
+
pipeline = create_pipeline(model)
|
104 |
+
output = fit_data(pipeline, text_to_analyze)
|
105 |
+
|
106 |
+
# Display transformed sentence
|
107 |
+
st.write("Prediction:")
|
108 |
+
output_text = "".join(output[0][0])
|
109 |
+
st.markdown(f'<div class="scroll">{output_text}</div>', unsafe_allow_html=True)
|
Dockerfile
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Download base image ubuntu 18.04
|
2 |
+
FROM ubuntu:18.04
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV NB_USER jovyan
|
6 |
+
ENV NB_UID 1000
|
7 |
+
ENV HOME /home/${NB_USER}
|
8 |
+
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
9 |
+
|
10 |
+
# Install required packages
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
tar \
|
13 |
+
wget \
|
14 |
+
bash \
|
15 |
+
rsync \
|
16 |
+
gcc \
|
17 |
+
libfreetype6-dev \
|
18 |
+
libhdf5-serial-dev \
|
19 |
+
libpng-dev \
|
20 |
+
libzmq3-dev \
|
21 |
+
python3 \
|
22 |
+
python3-dev \
|
23 |
+
python3-pip \
|
24 |
+
unzip \
|
25 |
+
pkg-config \
|
26 |
+
software-properties-common \
|
27 |
+
graphviz \
|
28 |
+
openjdk-8-jdk \
|
29 |
+
ant \
|
30 |
+
ca-certificates-java \
|
31 |
+
&& apt-get clean \
|
32 |
+
&& update-ca-certificates -f
|
33 |
+
|
34 |
+
# Install Python 3.8 and pip
|
35 |
+
RUN add-apt-repository ppa:deadsnakes/ppa \
|
36 |
+
&& apt-get update \
|
37 |
+
&& apt-get install -y python3.8 python3-pip \
|
38 |
+
&& apt-get clean
|
39 |
+
|
40 |
+
# Set up JAVA_HOME
|
41 |
+
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
|
42 |
+
&& echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
|
43 |
+
# Create a new user named "jovyan" with user ID 1000
|
44 |
+
RUN useradd -m -u ${NB_UID} ${NB_USER}
|
45 |
+
|
46 |
+
# Switch to the "jovyan" user
|
47 |
+
USER ${NB_USER}
|
48 |
+
|
49 |
+
# Set home and path variables for the user
|
50 |
+
ENV HOME=/home/${NB_USER} \
|
51 |
+
PATH=/home/${NB_USER}/.local/bin:$PATH
|
52 |
+
|
53 |
+
# Set up PySpark to use Python 3.8 for both driver and workers
|
54 |
+
ENV PYSPARK_PYTHON=/usr/bin/python3.8
|
55 |
+
ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
|
56 |
+
|
57 |
+
# Set the working directory to the user's home directory
|
58 |
+
WORKDIR ${HOME}
|
59 |
+
|
60 |
+
# Upgrade pip and install Python dependencies
|
61 |
+
RUN python3.8 -m pip install --upgrade pip
|
62 |
+
COPY requirements.txt /tmp/requirements.txt
|
63 |
+
RUN python3.8 -m pip install -r /tmp/requirements.txt
|
64 |
+
|
65 |
+
# Copy the application code into the container at /home/jovyan
|
66 |
+
COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
|
67 |
+
|
68 |
+
# Expose port for Streamlit
|
69 |
+
EXPOSE 7860
|
70 |
+
|
71 |
+
# Define the entry point for the container
|
72 |
+
ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
pages/Workflow & Model Overview.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
# Custom CSS for better styling
|
4 |
+
st.markdown("""
|
5 |
+
<style>
|
6 |
+
.main-title {
|
7 |
+
font-size: 36px;
|
8 |
+
color: #4A90E2;
|
9 |
+
font-weight: bold;
|
10 |
+
text-align: center;
|
11 |
+
}
|
12 |
+
.sub-title {
|
13 |
+
font-size: 24px;
|
14 |
+
color: #4A90E2;
|
15 |
+
margin-top: 20px;
|
16 |
+
}
|
17 |
+
.section {
|
18 |
+
background-color: #f9f9f9;
|
19 |
+
padding: 15px;
|
20 |
+
border-radius: 10px;
|
21 |
+
margin-top: 20px;
|
22 |
+
}
|
23 |
+
.section h2 {
|
24 |
+
font-size: 22px;
|
25 |
+
color: #4A90E2;
|
26 |
+
}
|
27 |
+
.section p, .section ul {
|
28 |
+
color: #666666;
|
29 |
+
}
|
30 |
+
.link {
|
31 |
+
color: #4A90E2;
|
32 |
+
text-decoration: none;
|
33 |
+
}
|
34 |
+
</style>
|
35 |
+
""", unsafe_allow_html=True)
|
36 |
+
|
37 |
+
# Title
|
38 |
+
st.markdown('<div class="main-title">Evaluate Sentence Grammar</div>', unsafe_allow_html=True)
|
39 |
+
|
40 |
+
# Introduction Section
|
41 |
+
st.markdown("""
|
42 |
+
<div class="section">
|
43 |
+
<p>Evaluating sentence grammar is crucial for maintaining the clarity and accuracy of written communication. Whether you're reviewing content for publication, editing academic work, or checking everyday writing, ensuring grammatical correctness is key.</p>
|
44 |
+
<p>This page showcases the implementation of a grammar evaluation pipeline using advanced NLP models. We leverage the T5 Transformer model, fine-tuned for assessing sentence grammar, to evaluate and identify potential errors in sentences.</p>
|
45 |
+
</div>
|
46 |
+
""", unsafe_allow_html=True)
|
47 |
+
|
48 |
+
# T5 Transformer Overview
|
49 |
+
st.markdown('<div class="sub-title">Understanding the T5 Transformer for Grammar Evaluation</div>', unsafe_allow_html=True)
|
50 |
+
|
51 |
+
st.markdown("""
|
52 |
+
<div class="section">
|
53 |
+
<p>The T5 (Text-To-Text Transfer Transformer) model, developed by Google, is a powerful tool for various NLP tasks, including grammar evaluation. When configured with the appropriate task, T5 can assess sentences for grammatical correctness, helping users identify and correct errors.</p>
|
54 |
+
<p>This capability is particularly useful in proofreading tools, automated editing software, and educational applications, where precise grammar is essential.</p>
|
55 |
+
</div>
|
56 |
+
""", unsafe_allow_html=True)
|
57 |
+
|
58 |
+
# Performance Section
|
59 |
+
st.markdown('<div class="sub-title">Performance and Use Cases</div>', unsafe_allow_html=True)
|
60 |
+
|
61 |
+
st.markdown("""
|
62 |
+
<div class="section">
|
63 |
+
<p>The T5 model exhibits strong performance in grammar evaluation tasks, providing accurate and contextually relevant assessments. This makes it a valuable resource for anyone looking to improve the quality of written content.</p>
|
64 |
+
<p>Use cases include academic proofreading, professional editing, and everyday writing checks, where maintaining grammatical integrity is of utmost importance.</p>
|
65 |
+
</div>
|
66 |
+
""", unsafe_allow_html=True)
|
67 |
+
|
68 |
+
# Implementation Section
|
69 |
+
st.markdown('<div class="sub-title">Implementing Grammar Evaluation</div>', unsafe_allow_html=True)
|
70 |
+
|
71 |
+
st.markdown("""
|
72 |
+
<div class="section">
|
73 |
+
<p>The following example demonstrates how to implement a grammar evaluation pipeline using Spark NLP. The pipeline includes a document assembler and the T5 model configured for evaluating sentence grammar.</p>
|
74 |
+
</div>
|
75 |
+
""", unsafe_allow_html=True)
|
76 |
+
|
77 |
+
st.code('''
|
78 |
+
import sparknlp
|
79 |
+
from sparknlp.base import *
|
80 |
+
from sparknlp.annotator import *
|
81 |
+
from pyspark.ml import Pipeline
|
82 |
+
|
83 |
+
# Initialize Spark NLP
|
84 |
+
spark = sparknlp.start()
|
85 |
+
|
86 |
+
# Define the pipeline stages
|
87 |
+
documentAssembler = DocumentAssembler() \\
|
88 |
+
.setInputCol("text") \\
|
89 |
+
.setOutputCol("documents")
|
90 |
+
|
91 |
+
t5 = T5Transformer.pretrained('t5_base') \\
|
92 |
+
.setTask("cola:") \\
|
93 |
+
.setInputCols(["documents"])\\
|
94 |
+
.setMaxOutputLength(200)\\
|
95 |
+
.setOutputCol("prediction")
|
96 |
+
|
97 |
+
pipeline = Pipeline().setStages([documentAssembler, t5])
|
98 |
+
|
99 |
+
# Input data example
|
100 |
+
data = spark.createDataFrame([["She don't knows nothing about what's happening in the office."]]).toDF("text")
|
101 |
+
|
102 |
+
# Apply the pipeline for grammar evaluation
|
103 |
+
result = pipeline.fit(data).transform(data)
|
104 |
+
result.select("prediction.result").show(truncate=False)
|
105 |
+
''', language='python')
|
106 |
+
|
107 |
+
# Example Output
|
108 |
+
st.text("""
|
109 |
+
+--------------------+
|
110 |
+
|corrections.result |
|
111 |
+
+--------------------+
|
112 |
+
|unacceptable |
|
113 |
+
+--------------------+
|
114 |
+
""")
|
115 |
+
|
116 |
+
# Model Info Section
|
117 |
+
st.markdown('<div class="sub-title">Choosing the Right T5 Model for Grammar Evaluation</div>', unsafe_allow_html=True)
|
118 |
+
|
119 |
+
st.markdown("""
|
120 |
+
<div class="section">
|
121 |
+
<p>For evaluating sentence grammar, we use the model: "t5_grammar_error_corrector" with the task set to "cola:". This model is specifically tuned to assess grammatical correctness in English sentences.</p>
|
122 |
+
<p>Explore other T5 models tailored for different NLP tasks on the <a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Models Hub</a> to find the best fit for your specific needs.</p>
|
123 |
+
</div>
|
124 |
+
""", unsafe_allow_html=True)
|
125 |
+
|
126 |
+
# References Section
|
127 |
+
st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
|
128 |
+
|
129 |
+
st.markdown("""
|
130 |
+
<div class="section">
|
131 |
+
<ul>
|
132 |
+
<li><a class="link" href="https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html" target="_blank">Google AI Blog</a>: Exploring Transfer Learning with T5</li>
|
133 |
+
<li><a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Model Hub</a>: Explore T5 models</li>
|
134 |
+
<li><a class="link" href="https://github.com/google-research/text-to-text-transfer-transformer" target="_blank">GitHub</a>: T5 Transformer repository</li>
|
135 |
+
<li><a class="link" href="https://arxiv.org/abs/1910.10683" target="_blank">T5 Paper</a>: Detailed insights from the developers</li>
|
136 |
+
</ul>
|
137 |
+
</div>
|
138 |
+
""", unsafe_allow_html=True)
|
139 |
+
|
140 |
+
# Community & Support Section
|
141 |
+
st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
|
142 |
+
|
143 |
+
st.markdown("""
|
144 |
+
<div class="section">
|
145 |
+
<ul>
|
146 |
+
<li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
|
147 |
+
<li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
|
148 |
+
<li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
|
149 |
+
<li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
|
150 |
+
<li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
|
151 |
+
</ul>
|
152 |
+
</div>
|
153 |
+
""", unsafe_allow_html=True)
|
154 |
+
|
155 |
+
# Quick Links Section
|
156 |
+
st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
|
157 |
+
|
158 |
+
st.markdown("""
|
159 |
+
<div class="section">
|
160 |
+
<ul>
|
161 |
+
<li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
|
162 |
+
<li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
|
163 |
+
<li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
|
164 |
+
<li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
|
165 |
+
</ul>
|
166 |
+
</div>
|
167 |
+
""", unsafe_allow_html=True)
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
st-annotated-text
|
3 |
+
streamlit-tags
|
4 |
+
pandas
|
5 |
+
numpy
|
6 |
+
spark-nlp
|
7 |
+
pyspark
|