Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- .gitattributes +1 -0
- app.py +24 -0
- model_rnn/fingerprint.pb +3 -0
- model_rnn/keras_metadata.pb +3 -0
- model_rnn/saved_model.pb +3 -0
- model_rnn/variables/variables.data-00000-of-00001 +3 -0
- model_rnn/variables/variables.index +0 -0
- prediction.py +82 -0
- requirements.txt +9 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model_rnn/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
# Set the page title and favicon
|
6 |
+
st.set_page_config(page_title="Notes Text Classification",
|
7 |
+
layout='wide',
|
8 |
+
initial_sidebar_state='expanded'
|
9 |
+
)
|
10 |
+
|
11 |
+
# Create a sidebar with a title and a selection box
|
12 |
+
st.sidebar.title("Choose a page:")
|
13 |
+
page = st.sidebar.selectbox("", ('Landing Page' , 'Data Prediction'))
|
14 |
+
|
15 |
+
# Display different content depending on the selected page
|
16 |
+
if page == 'Data Prediction':
|
17 |
+
prediction.run()
|
18 |
+
else:
|
19 |
+
# Add a header and a subheader with some text
|
20 |
+
st.title("What category does this note belong to?")
|
21 |
+
st.subheader("Find out the category with this space that uses NLP to do predictions.")
|
22 |
+
|
23 |
+
# Add an image about the case
|
24 |
+
st.image("https://imageio.forbes.com/specials-images/imageserve/60808d87824ab7edc3770486/Note-Pad-and-Pen-on-Yellow-background/960x0.jpg?height=474&width=711&fit=bounds")
|
model_rnn/fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37d606dfcfb95b10bb426d66179918955507942eef68355640a24ec95fd18535
|
3 |
+
size 57
|
model_rnn/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c769156491524cd04ccbcc89614a1c17acee03d76d4e99e6e73b6655a13a022f
|
3 |
+
size 35540
|
model_rnn/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92fcd21f1a911fc986008fe7fd965c96a46293d0dd31546d0feddfc6e249be0d
|
3 |
+
size 5117841
|
model_rnn/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3077fbe89ebff4ce89b4501a9c4b34782544026b7c815a3fc6e772e1b324a023
|
3 |
+
size 275723988
|
model_rnn/variables/variables.index
ADDED
Binary file (4.25 kB). View file
|
|
prediction.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from tensorflow.keras.models import load_model
|
5 |
+
import re
|
6 |
+
import nltk
|
7 |
+
nltk.download('punkt')
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
nltk.download('stopwords')
|
10 |
+
from nltk.corpus import stopwords
|
11 |
+
nltk.download('wordnet')
|
12 |
+
from nltk.stem import WordNetLemmatizer
|
13 |
+
|
14 |
+
# Load the model
|
15 |
+
loaded_model = load_model('model_rnn')
|
16 |
+
|
17 |
+
# Create a dictionary to map the labels to the categories
|
18 |
+
label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
|
19 |
+
5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
|
20 |
+
9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
|
21 |
+
13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
|
22 |
+
16: 'Tabungan & Investasi'}
|
23 |
+
|
24 |
+
def preprocessing(text):
|
25 |
+
'''
|
26 |
+
Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
|
27 |
+
'''
|
28 |
+
# Lowercase the text
|
29 |
+
text = text.lower()
|
30 |
+
|
31 |
+
# Normalize the text
|
32 |
+
text = re.sub(r'\d+', '', text) # Remove numbers
|
33 |
+
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
|
34 |
+
text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces
|
35 |
+
|
36 |
+
# Tokenize the text
|
37 |
+
tokens = word_tokenize(text)
|
38 |
+
|
39 |
+
# Get the English stopwords
|
40 |
+
stop_words = set(stopwords.words('indonesian'))
|
41 |
+
stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
|
42 |
+
|
43 |
+
# Remove stopwords
|
44 |
+
tokens = [word for word in tokens if word not in stop_words]
|
45 |
+
|
46 |
+
# Lemmatize the text
|
47 |
+
lemmatizer = WordNetLemmatizer()
|
48 |
+
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
49 |
+
|
50 |
+
# Combine tokens back into a single string
|
51 |
+
text = ' '.join(tokens)
|
52 |
+
|
53 |
+
return text
|
54 |
+
|
55 |
+
def run():
|
56 |
+
st.title('Notes Categorization')
|
57 |
+
|
58 |
+
default = "konser twice"
|
59 |
+
|
60 |
+
user_input = st.text_area("Enter the notes text here:", default, height=50)
|
61 |
+
|
62 |
+
if st.button('Predict'):
|
63 |
+
# Apply the function to the 'Text' column in the data
|
64 |
+
text_processed = preprocessing(user_input)
|
65 |
+
|
66 |
+
# The model expects input data in batch, even if just predicting on one sample
|
67 |
+
# So, I'll add an extra dimension with np.expand_dims
|
68 |
+
preprocessed_notes = np.expand_dims(text_processed, axis=0)
|
69 |
+
|
70 |
+
# get the prediction
|
71 |
+
predictions = loaded_model.predict(preprocessed_notes)
|
72 |
+
|
73 |
+
# get the class with the highest probability
|
74 |
+
predicted_class = np.argmax(predictions[0])
|
75 |
+
|
76 |
+
# Decode the predicted class into the original category
|
77 |
+
predicted_category = label_dict[predicted_class]
|
78 |
+
|
79 |
+
st.write(f'The predicted category is: {predicted_category}')
|
80 |
+
|
81 |
+
if __name__ == '__main__':
|
82 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas==2.1.4
|
2 |
+
streamlit==1.29.0
|
3 |
+
numpy==1.26.3
|
4 |
+
matplotlib==3.8.0
|
5 |
+
plotly==5.9.0
|
6 |
+
seaborn==0.12.2
|
7 |
+
wordcloud==1.9.3
|
8 |
+
nltk==3.8.1
|
9 |
+
tensorflow==2.15.0
|