bainskarman commited on
Commit
f72805a
·
verified ·
1 Parent(s): 3d9b27d

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. app.py +79 -0
  3. best_model.keras +3 -0
  4. label_encoder.pkl +3 -0
  5. max_length.pkl +3 -0
  6. requirements.txt +163 -0
  7. stop_words.pkl +3 -0
  8. tokenizer.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ best_model.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+ import os
5
+ from tensorflow.keras.models import load_model
6
+ import numpy as np
7
+ import pandas as pd
8
+ import re
9
+ import nltk
10
+ from nltk.stem import WordNetLemmatizer
11
+ from nltk.tokenize import word_tokenize
12
+ import matplotlib.pyplot as plt
13
+ import seaborn as sns
14
+ model = load_model('best_model.keras')
15
+ # Load the tokenizer
16
+ with open('tokenizer.pkl' ,'rb') as f:
17
+ tokenizer = pickle.load(f)
18
+
19
+ # Load the label encoder
20
+ with open('label_encoder.pkl', 'rb') as f:
21
+ label_encoder = pickle.load(f)
22
+
23
+ # Load max_length
24
+ with open('max_length.pkl', 'rb') as f:
25
+ max_length = pickle.load(f)
26
+
27
+ # Load stop words
28
+ with open('stop_words.pkl', 'rb') as f:
29
+ stop_words = pickle.load(f)
30
+
31
+ lemmatizer = WordNetLemmatizer()
32
+ def preprocess_text(text):
33
+ text = str(text)
34
+ text = text.lower()
35
+ text = re.sub(r'[^a-z\s]', '', text)
36
+ words = text.split()
37
+ st_words = stop_words
38
+ words = [word for word in words if word not in stop_words]
39
+ words = [lemmatizer.lemmatize(word) for word in words]
40
+ text = ' '.join(words)
41
+ return text
42
+ def classify_text(text):
43
+ text = preprocess_text(text)
44
+ seq = tokenizer.texts_to_sequences([text])
45
+ padded_seq = np.pad(seq, ((0, 0), (0, max_length - len(seq[0]))), mode='constant')
46
+
47
+ prediction = model.predict(padded_seq)
48
+ predicted_label_index = np.argmax(prediction, axis=1)[0]
49
+ predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
50
+ categories = predicted_label.split('|')
51
+
52
+ if len(categories) == 3:
53
+ main_category = categories[0]
54
+ sub_category = categories[1]
55
+ lowest_category = categories[2]
56
+ else:
57
+ main_category = "Unknown"
58
+ sub_category = "Unknown"
59
+ lowest_category = "Unknown"
60
+ return main_category, sub_category, lowest_category
61
+
62
+
63
+ # Streamlit UI
64
+ def main():
65
+ st.title("Text Classifier")
66
+
67
+ # Text input
68
+ user_input = st.text_input("Enter text to classify")
69
+
70
+ if st.button("Classify"):
71
+ if user_input:
72
+ # Classify input text
73
+ main_category, sub_category, lowest_category = classify_text(user_input)
74
+ st.success(f"Main Category: {main_category}, Sub Category: {sub_category}, Lowest Category: {lowest_category}")
75
+ else:
76
+ st.warning("Please enter some text.")
77
+
78
+ if __name__ == '__main__':
79
+ main()
best_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e070eaaa1b2e0f0127d3573143c4d2d4c55c990712056fc7cf464a8f97e51502
3
+ size 475889261
label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:967cb7a8fc78d9f8cf3c7f64e239b0a53c6874319f5e75e5d4837eddaea75795
3
+ size 7004
max_length.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a0467a2b62fcd0258bc493b39bd7a776318283536de2953ac0da9997f62dd4
3
+ size 15
requirements.txt ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ altair==5.4.1
3
+ anyio==4.4.0
4
+ argon2-cffi==23.1.0
5
+ argon2-cffi-bindings==21.2.0
6
+ arrow==1.3.0
7
+ asttokens==2.4.1
8
+ astunparse==1.6.3
9
+ async-lru==2.0.4
10
+ attrs==24.2.0
11
+ babel==2.16.0
12
+ beautifulsoup4==4.12.3
13
+ bleach==6.1.0
14
+ blinker==1.8.2
15
+ cachetools==5.5.0
16
+ certifi==2024.8.30
17
+ cffi==1.17.1
18
+ charset-normalizer==3.3.2
19
+ click==8.1.7
20
+ colorama==0.4.6
21
+ comm==0.2.2
22
+ contourpy==1.3.0
23
+ cycler==0.12.1
24
+ debugpy==1.8.5
25
+ decorator==5.1.1
26
+ defusedxml==0.7.1
27
+ executing==2.1.0
28
+ fastjsonschema==2.20.0
29
+ filelock==3.13.1
30
+ flatbuffers==24.3.25
31
+ fonttools==4.53.1
32
+ fqdn==1.5.1
33
+ fsspec==2024.2.0
34
+ gast==0.6.0
35
+ gitdb==4.0.11
36
+ GitPython==3.1.43
37
+ google-pasta==0.2.0
38
+ grpcio==1.67.0
39
+ h11==0.14.0
40
+ h5py==3.12.1
41
+ httpcore==1.0.5
42
+ httpx==0.27.2
43
+ huggingface-hub==0.26.0
44
+ idna==3.10
45
+ ipykernel==6.29.5
46
+ ipython==8.27.0
47
+ isoduration==20.11.0
48
+ jedi==0.19.1
49
+ Jinja2==3.1.4
50
+ joblib==1.4.2
51
+ json5==0.9.25
52
+ jsonpointer==3.0.0
53
+ jsonschema==4.23.0
54
+ jsonschema-specifications==2023.12.1
55
+ jupyter-events==0.10.0
56
+ jupyter-lsp==2.2.5
57
+ jupyter-server-mathjax==0.2.6
58
+ jupyter_client==8.6.2
59
+ jupyter_core==5.7.2
60
+ jupyter_server==2.14.2
61
+ jupyter_server_terminals==0.5.3
62
+ jupyterlab==4.2.5
63
+ jupyterlab_git==0.50.1
64
+ jupyterlab_pygments==0.3.0
65
+ jupyterlab_server==2.27.3
66
+ keras==3.6.0
67
+ kiwisolver==1.4.7
68
+ libclang==18.1.1
69
+ Markdown==3.7
70
+ markdown-it-py==3.0.0
71
+ MarkupSafe==2.1.5
72
+ matplotlib==3.9.2
73
+ matplotlib-inline==0.1.7
74
+ mdurl==0.1.2
75
+ mistune==3.0.2
76
+ ml-dtypes==0.4.1
77
+ mpmath==1.3.0
78
+ namex==0.0.8
79
+ narwhals==1.8.4
80
+ nbclient==0.10.0
81
+ nbconvert==7.16.4
82
+ nbdime==4.0.2
83
+ nbformat==5.10.4
84
+ nest-asyncio==1.6.0
85
+ networkx==3.2.1
86
+ nltk==3.9.1
87
+ notebook_shim==0.2.4
88
+ numpy==1.26.0
89
+ opt_einsum==3.4.0
90
+ optree==0.13.0
91
+ overrides==7.7.0
92
+ packaging==24.1
93
+ pandas==2.2.2
94
+ pandocfilters==1.5.1
95
+ parso==0.8.4
96
+ pexpect==4.9.0
97
+ pillow==10.4.0
98
+ platformdirs==4.3.3
99
+ plotly==5.24.1
100
+ prometheus_client==0.20.0
101
+ prompt_toolkit==3.0.47
102
+ protobuf==4.25.5
103
+ psutil==6.0.0
104
+ ptyprocess==0.7.0
105
+ pure_eval==0.2.3
106
+ pyarrow==17.0.0
107
+ pycparser==2.22
108
+ pydeck==0.9.1
109
+ Pygments==2.18.0
110
+ pyparsing==3.1.4
111
+ python-dateutil==2.9.0.post0
112
+ python-json-logger==2.0.7
113
+ pytz==2024.2
114
+ PyYAML==6.0.2
115
+ pyzmq==26.2.0
116
+ referencing==0.35.1
117
+ regex==2024.9.11
118
+ requests==2.32.3
119
+ rfc3339-validator==0.1.4
120
+ rfc3986-validator==0.1.1
121
+ rich==13.8.1
122
+ rpds-py==0.20.0
123
+ safetensors==0.4.5
124
+ scikit-learn==1.5.2
125
+ scipy==1.14.1
126
+ seaborn==0.13.2
127
+ Send2Trash==1.8.3
128
+ setuptools==75.1.0
129
+ six==1.16.0
130
+ smmap==5.0.1
131
+ sniffio==1.3.1
132
+ soupsieve==2.6
133
+ stack-data==0.6.3
134
+ streamlit==1.38.0
135
+ sympy==1.12
136
+ tenacity==8.5.0
137
+ tensorboard==2.17.1
138
+ tensorboard-data-server==0.7.2
139
+ tensorflow==2.17.0
140
+ termcolor==2.5.0
141
+ terminado==0.18.1
142
+ threadpoolctl==3.5.0
143
+ tinycss2==1.3.0
144
+ tokenizers==0.20.1
145
+ toml==0.10.2
146
+ torch==2.4.1+cpu
147
+ tornado==6.4.1
148
+ tqdm==4.66.5
149
+ traitlets==5.14.3
150
+ transformers==4.45.2
151
+ types-python-dateutil==2.9.0.20240906
152
+ typing_extensions==4.12.2
153
+ tzdata==2024.1
154
+ uri-template==1.3.0
155
+ urllib3==2.2.3
156
+ watchdog==4.0.2
157
+ wcwidth==0.2.13
158
+ webcolors==24.8.0
159
+ webencodings==0.5.1
160
+ websocket-client==1.8.0
161
+ Werkzeug==3.0.4
162
+ wheel==0.44.0
163
+ wrapt==1.16.0
stop_words.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4363f21909e984f52cfe3c4df4d312aa9d8442362fe1be2830963668c62f6d26
3
+ size 1310
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c775b770e47d61ae4910728eded6e4c5c3843f1ca7f957a8a42417ac943195a4
3
+ size 18221913