Spaces:

bainskarman
/

WiqiClassification

Sleeping

App Files Files Community

bainskarman commited on Oct 19, 2024

Commit

f72805a

verified ·

1 Parent(s): 3d9b27d

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
app.py +79 -0
best_model.keras +3 -0
label_encoder.pkl +3 -0
max_length.pkl +3 -0
requirements.txt +163 -0
stop_words.pkl +3 -0
tokenizer.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+best_model.keras filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+import pickle
+import numpy as np
+import os
+from tensorflow.keras.models import load_model
+import numpy as np
+import pandas as pd
+import re
+import nltk
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+import matplotlib.pyplot as plt
+import seaborn as sns
+model = load_model('best_model.keras')
+# Load the tokenizer
+with open('tokenizer.pkl' ,'rb') as f:
+    tokenizer = pickle.load(f)
+# Load the label encoder
+with open('label_encoder.pkl', 'rb') as f:
+    label_encoder = pickle.load(f)
+# Load max_length
+with open('max_length.pkl', 'rb') as f:
+    max_length = pickle.load(f)
+# Load stop words
+with open('stop_words.pkl', 'rb') as f:
+    stop_words = pickle.load(f)
+lemmatizer = WordNetLemmatizer()
+def preprocess_text(text):
+    text = str(text)
+    text = text.lower()
+    text = re.sub(r'[^a-z\s]', '', text)
+    words = text.split()
+    st_words = stop_words
+    words = [word for word in words if word not in stop_words]
+    words = [lemmatizer.lemmatize(word) for word in words]
+    text = ' '.join(words)
+    return text
+def classify_text(text):
+    text = preprocess_text(text)
+    seq = tokenizer.texts_to_sequences([text])
+    padded_seq = np.pad(seq, ((0, 0), (0, max_length - len(seq[0]))), mode='constant')
+    prediction = model.predict(padded_seq)
+    predicted_label_index = np.argmax(prediction, axis=1)[0]
+    predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
+    categories = predicted_label.split('|')
+    if len(categories) == 3:
+        main_category = categories[0]
+        sub_category = categories[1]
+        lowest_category = categories[2]
+    else:
+        main_category = "Unknown"
+        sub_category = "Unknown"
+        lowest_category = "Unknown"
+    return main_category, sub_category, lowest_category
+# Streamlit UI
+def main():
+    st.title("Text Classifier")
+    # Text input
+    user_input = st.text_input("Enter text to classify")
+    if st.button("Classify"):
+        if user_input:
+            # Classify input text
+           main_category, sub_category, lowest_category = classify_text(user_input)
+           st.success(f"Main Category: {main_category}, Sub Category: {sub_category}, Lowest Category: {lowest_category}")
+        else:
+            st.warning("Please enter some text.")
+if __name__ == '__main__':
+    main()

best_model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e070eaaa1b2e0f0127d3573143c4d2d4c55c990712056fc7cf464a8f97e51502
+size 475889261

label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:967cb7a8fc78d9f8cf3c7f64e239b0a53c6874319f5e75e5d4837eddaea75795
+size 7004

max_length.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81a0467a2b62fcd0258bc493b39bd7a776318283536de2953ac0da9997f62dd4
+size 15

requirements.txt ADDED Viewed

	@@ -0,0 +1,163 @@

+absl-py==2.1.0
+altair==5.4.1
+anyio==4.4.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+astunparse==1.6.3
+async-lru==2.0.4
+attrs==24.2.0
+babel==2.16.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+blinker==1.8.2
+cachetools==5.5.0
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.2
+contourpy==1.3.0
+cycler==0.12.1
+debugpy==1.8.5
+decorator==5.1.1
+defusedxml==0.7.1
+executing==2.1.0
+fastjsonschema==2.20.0
+filelock==3.13.1
+flatbuffers==24.3.25
+fonttools==4.53.1
+fqdn==1.5.1
+fsspec==2024.2.0
+gast==0.6.0
+gitdb==4.0.11
+GitPython==3.1.43
+google-pasta==0.2.0
+grpcio==1.67.0
+h11==0.14.0
+h5py==3.12.1
+httpcore==1.0.5
+httpx==0.27.2
+huggingface-hub==0.26.0
+idna==3.10
+ipykernel==6.29.5
+ipython==8.27.0
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.4
+joblib==1.4.2
+json5==0.9.25
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter-server-mathjax==0.2.6
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.5
+jupyterlab_git==0.50.1
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+keras==3.6.0
+kiwisolver==1.4.7
+libclang==18.1.1
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+ml-dtypes==0.4.1
+mpmath==1.3.0
+namex==0.0.8
+narwhals==1.8.4
+nbclient==0.10.0
+nbconvert==7.16.4
+nbdime==4.0.2
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.2.1
+nltk==3.9.1
+notebook_shim==0.2.4
+numpy==1.26.0
+opt_einsum==3.4.0
+optree==0.13.0
+overrides==7.7.0
+packaging==24.1
+pandas==2.2.2
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.3.3
+plotly==5.24.1
+prometheus_client==0.20.0
+prompt_toolkit==3.0.47
+protobuf==4.25.5
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==17.0.0
+pycparser==2.22
+pydeck==0.9.1
+Pygments==2.18.0
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-json-logger==2.0.7
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+regex==2024.9.11
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.8.1
+rpds-py==0.20.0
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+seaborn==0.13.2
+Send2Trash==1.8.3
+setuptools==75.1.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+streamlit==1.38.0
+sympy==1.12
+tenacity==8.5.0
+tensorboard==2.17.1
+tensorboard-data-server==0.7.2
+tensorflow==2.17.0
+termcolor==2.5.0
+terminado==0.18.1
+threadpoolctl==3.5.0
+tinycss2==1.3.0
+tokenizers==0.20.1
+toml==0.10.2
+torch==2.4.1+cpu
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+transformers==4.45.2
+types-python-dateutil==2.9.0.20240906
+typing_extensions==4.12.2
+tzdata==2024.1
+uri-template==1.3.0
+urllib3==2.2.3
+watchdog==4.0.2
+wcwidth==0.2.13
+webcolors==24.8.0
+webencodings==0.5.1
+websocket-client==1.8.0
+Werkzeug==3.0.4
+wheel==0.44.0
+wrapt==1.16.0

stop_words.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4363f21909e984f52cfe3c4df4d312aa9d8442362fe1be2830963668c62f6d26
+size 1310

tokenizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c775b770e47d61ae4910728eded6e4c5c3843f1ca7f957a8a42417ac943195a4
+size 18221913