abdullahmubeen10 commited on
Commit
fe05f12
1 Parent(s): eca660b

Upload 5 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+
4
+ from sparknlp.base import *
5
+ from sparknlp.annotator import *
6
+ from pyspark.ml import Pipeline
7
+
8
+ # Page configuration
9
+ st.set_page_config(
10
+ layout="wide",
11
+ initial_sidebar_state="auto"
12
+ )
13
+
14
+ # CSS for styling
15
+ st.markdown("""
16
+ <style>
17
+ .main-title {
18
+ font-size: 36px;
19
+ color: #4A90E2;
20
+ font-weight: bold;
21
+ text-align: center;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 10px;
26
+ border-radius: 10px;
27
+ margin-top: 10px;
28
+ }
29
+ .section p, .section ul {
30
+ color: #666666;
31
+ }
32
+ </style>
33
+ """, unsafe_allow_html=True)
34
+
35
+ @st.cache_resource
36
+ def init_spark():
37
+ return sparknlp.start()
38
+
39
+ @st.cache_resource
40
+ def create_pipeline(model):
41
+ documentAssembler = DocumentAssembler() \
42
+ .setInputCol("text") \
43
+ .setOutputCol("documents")
44
+
45
+ t5 = T5Transformer.pretrained(model) \
46
+ .setTask("cola:") \
47
+ .setInputCols(["documents"])\
48
+ .setMaxOutputLength(200)\
49
+ .setOutputCol("corrections")
50
+
51
+ pipeline = Pipeline().setStages([documentAssembler, t5])
52
+ return pipeline
53
+
54
+ def fit_data(pipeline, data):
55
+ df = spark.createDataFrame([[data]]).toDF("text")
56
+ result = pipeline.fit(df).transform(df)
57
+ return result.select('corrections.result').collect()
58
+
59
+ # Sidebar content
60
+ model = st.sidebar.selectbox(
61
+ "Choose the pretrained model",
62
+ ['t5_base', 't5_small', 't5_large'],
63
+ help="For more info about the models visit: https://sparknlp.org/models"
64
+ )
65
+
66
+ # Set up the page layout
67
+ title = "Evaluate Sentence Grammar"
68
+ sub_title = "This demo uses a text-to-text model fine-tuned to evaluate grammatical errors when the task is set to 'cola:'"
69
+
70
+ st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
71
+ st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
72
+
73
+ # Reference notebook link in sidebar
74
+ link = """
75
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb#scrollTo=QAZ3vOX_SW7B">
76
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
77
+ </a>
78
+ """
79
+ st.sidebar.markdown('Reference notebook:')
80
+ st.sidebar.markdown(link, unsafe_allow_html=True)
81
+
82
+ # Define the examples
83
+ examples = [
84
+ "She don't knows nothing about what's happening in the office.",
85
+ "They was playing soccer yesterday when it start raining heavily.",
86
+ "This car are more faster than that one, but it costed less money.",
87
+ "I seen him go to the store, but he don't buy nothing from there.",
88
+ "We was going to the park but it start raining before we could leave."
89
+ ]
90
+
91
+ # Text selection and analysis
92
+ selected_text = st.selectbox("Select an example", examples)
93
+ custom_input = st.text_input("Try it with your own sentence!")
94
+
95
+ text_to_analyze = custom_input if custom_input else selected_text
96
+
97
+ st.write('Text to be evaluated:')
98
+ HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
99
+ st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
100
+
101
+ # Initialize Spark and create pipeline
102
+ spark = init_spark()
103
+ pipeline = create_pipeline(model)
104
+ output = fit_data(pipeline, text_to_analyze)
105
+
106
+ # Display transformed sentence
107
+ st.write("Prediction:")
108
+ output_text = "".join(output[0][0])
109
+ st.markdown(f'<div class="scroll">{output_text}</div>', unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
9
+
10
+ # Install required packages
11
+ RUN apt-get update && apt-get install -y \
12
+ tar \
13
+ wget \
14
+ bash \
15
+ rsync \
16
+ gcc \
17
+ libfreetype6-dev \
18
+ libhdf5-serial-dev \
19
+ libpng-dev \
20
+ libzmq3-dev \
21
+ python3 \
22
+ python3-dev \
23
+ python3-pip \
24
+ unzip \
25
+ pkg-config \
26
+ software-properties-common \
27
+ graphviz \
28
+ openjdk-8-jdk \
29
+ ant \
30
+ ca-certificates-java \
31
+ && apt-get clean \
32
+ && update-ca-certificates -f
33
+
34
+ # Install Python 3.8 and pip
35
+ RUN add-apt-repository ppa:deadsnakes/ppa \
36
+ && apt-get update \
37
+ && apt-get install -y python3.8 python3-pip \
38
+ && apt-get clean
39
+
40
+ # Set up JAVA_HOME
41
+ RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
42
+ && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
43
+ # Create a new user named "jovyan" with user ID 1000
44
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
45
+
46
+ # Switch to the "jovyan" user
47
+ USER ${NB_USER}
48
+
49
+ # Set home and path variables for the user
50
+ ENV HOME=/home/${NB_USER} \
51
+ PATH=/home/${NB_USER}/.local/bin:$PATH
52
+
53
+ # Set up PySpark to use Python 3.8 for both driver and workers
54
+ ENV PYSPARK_PYTHON=/usr/bin/python3.8
55
+ ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
56
+
57
+ # Set the working directory to the user's home directory
58
+ WORKDIR ${HOME}
59
+
60
+ # Upgrade pip and install Python dependencies
61
+ RUN python3.8 -m pip install --upgrade pip
62
+ COPY requirements.txt /tmp/requirements.txt
63
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
64
+
65
+ # Copy the application code into the container at /home/jovyan
66
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
67
+
68
+ # Expose port for Streamlit
69
+ EXPOSE 7860
70
+
71
+ # Define the entry point for the container
72
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS for better styling
4
+ st.markdown("""
5
+ <style>
6
+ .main-title {
7
+ font-size: 36px;
8
+ color: #4A90E2;
9
+ font-weight: bold;
10
+ text-align: center;
11
+ }
12
+ .sub-title {
13
+ font-size: 24px;
14
+ color: #4A90E2;
15
+ margin-top: 20px;
16
+ }
17
+ .section {
18
+ background-color: #f9f9f9;
19
+ padding: 15px;
20
+ border-radius: 10px;
21
+ margin-top: 20px;
22
+ }
23
+ .section h2 {
24
+ font-size: 22px;
25
+ color: #4A90E2;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .link {
31
+ color: #4A90E2;
32
+ text-decoration: none;
33
+ }
34
+ </style>
35
+ """, unsafe_allow_html=True)
36
+
37
+ # Title
38
+ st.markdown('<div class="main-title">Evaluate Sentence Grammar</div>', unsafe_allow_html=True)
39
+
40
+ # Introduction Section
41
+ st.markdown("""
42
+ <div class="section">
43
+ <p>Evaluating sentence grammar is crucial for maintaining the clarity and accuracy of written communication. Whether you're reviewing content for publication, editing academic work, or checking everyday writing, ensuring grammatical correctness is key.</p>
44
+ <p>This page showcases the implementation of a grammar evaluation pipeline using advanced NLP models. We leverage the T5 Transformer model, fine-tuned for assessing sentence grammar, to evaluate and identify potential errors in sentences.</p>
45
+ </div>
46
+ """, unsafe_allow_html=True)
47
+
48
+ # T5 Transformer Overview
49
+ st.markdown('<div class="sub-title">Understanding the T5 Transformer for Grammar Evaluation</div>', unsafe_allow_html=True)
50
+
51
+ st.markdown("""
52
+ <div class="section">
53
+ <p>The T5 (Text-To-Text Transfer Transformer) model, developed by Google, is a powerful tool for various NLP tasks, including grammar evaluation. When configured with the appropriate task, T5 can assess sentences for grammatical correctness, helping users identify and correct errors.</p>
54
+ <p>This capability is particularly useful in proofreading tools, automated editing software, and educational applications, where precise grammar is essential.</p>
55
+ </div>
56
+ """, unsafe_allow_html=True)
57
+
58
+ # Performance Section
59
+ st.markdown('<div class="sub-title">Performance and Use Cases</div>', unsafe_allow_html=True)
60
+
61
+ st.markdown("""
62
+ <div class="section">
63
+ <p>The T5 model exhibits strong performance in grammar evaluation tasks, providing accurate and contextually relevant assessments. This makes it a valuable resource for anyone looking to improve the quality of written content.</p>
64
+ <p>Use cases include academic proofreading, professional editing, and everyday writing checks, where maintaining grammatical integrity is of utmost importance.</p>
65
+ </div>
66
+ """, unsafe_allow_html=True)
67
+
68
+ # Implementation Section
69
+ st.markdown('<div class="sub-title">Implementing Grammar Evaluation</div>', unsafe_allow_html=True)
70
+
71
+ st.markdown("""
72
+ <div class="section">
73
+ <p>The following example demonstrates how to implement a grammar evaluation pipeline using Spark NLP. The pipeline includes a document assembler and the T5 model configured for evaluating sentence grammar.</p>
74
+ </div>
75
+ """, unsafe_allow_html=True)
76
+
77
+ st.code('''
78
+ import sparknlp
79
+ from sparknlp.base import *
80
+ from sparknlp.annotator import *
81
+ from pyspark.ml import Pipeline
82
+
83
+ # Initialize Spark NLP
84
+ spark = sparknlp.start()
85
+
86
+ # Define the pipeline stages
87
+ documentAssembler = DocumentAssembler() \\
88
+ .setInputCol("text") \\
89
+ .setOutputCol("documents")
90
+
91
+ t5 = T5Transformer.pretrained('t5_base') \\
92
+ .setTask("cola:") \\
93
+ .setInputCols(["documents"])\\
94
+ .setMaxOutputLength(200)\\
95
+ .setOutputCol("prediction")
96
+
97
+ pipeline = Pipeline().setStages([documentAssembler, t5])
98
+
99
+ # Input data example
100
+ data = spark.createDataFrame([["She don't knows nothing about what's happening in the office."]]).toDF("text")
101
+
102
+ # Apply the pipeline for grammar evaluation
103
+ result = pipeline.fit(data).transform(data)
104
+ result.select("prediction.result").show(truncate=False)
105
+ ''', language='python')
106
+
107
+ # Example Output
108
+ st.text("""
109
+ +--------------------+
110
+ |corrections.result |
111
+ +--------------------+
112
+ |unacceptable |
113
+ +--------------------+
114
+ """)
115
+
116
+ # Model Info Section
117
+ st.markdown('<div class="sub-title">Choosing the Right T5 Model for Grammar Evaluation</div>', unsafe_allow_html=True)
118
+
119
+ st.markdown("""
120
+ <div class="section">
121
+ <p>For evaluating sentence grammar, we use the model: "t5_grammar_error_corrector" with the task set to "cola:". This model is specifically tuned to assess grammatical correctness in English sentences.</p>
122
+ <p>Explore other T5 models tailored for different NLP tasks on the <a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Models Hub</a> to find the best fit for your specific needs.</p>
123
+ </div>
124
+ """, unsafe_allow_html=True)
125
+
126
+ # References Section
127
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
128
+
129
+ st.markdown("""
130
+ <div class="section">
131
+ <ul>
132
+ <li><a class="link" href="https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html" target="_blank">Google AI Blog</a>: Exploring Transfer Learning with T5</li>
133
+ <li><a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Model Hub</a>: Explore T5 models</li>
134
+ <li><a class="link" href="https://github.com/google-research/text-to-text-transfer-transformer" target="_blank">GitHub</a>: T5 Transformer repository</li>
135
+ <li><a class="link" href="https://arxiv.org/abs/1910.10683" target="_blank">T5 Paper</a>: Detailed insights from the developers</li>
136
+ </ul>
137
+ </div>
138
+ """, unsafe_allow_html=True)
139
+
140
+ # Community & Support Section
141
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
142
+
143
+ st.markdown("""
144
+ <div class="section">
145
+ <ul>
146
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
147
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
148
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
149
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
150
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
151
+ </ul>
152
+ </div>
153
+ """, unsafe_allow_html=True)
154
+
155
+ # Quick Links Section
156
+ st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
157
+
158
+ st.markdown("""
159
+ <div class="section">
160
+ <ul>
161
+ <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
162
+ <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
163
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
164
+ <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
165
+ </ul>
166
+ </div>
167
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ streamlit-tags
4
+ pandas
5
+ numpy
6
+ spark-nlp
7
+ pyspark