Spaces:
Runtime error
Runtime error
from sklearn.feature_extraction.text import TfidfVectorizer | |
from TurkishStemmer import TurkishStemmer | |
import string | |
# import for loading python objects (scikit-learn models) | |
import pickle | |
import nltk | |
from nltk.data import load | |
import streamlit as st | |
import sklearn | |
nltk.download('punkt') | |
trans_table = {ord(c): None for c in string.punctuation + string.digits} | |
def custom_tokenizer_with_Turkish_stemmer(text): | |
# tokenize text | |
# tokens = text.split(" ") | |
tokens = [word for word in nltk.word_tokenize(text.translate(trans_table))] | |
print(tokens) | |
stems = [stemmerTR.stem(item.lower()) for item in tokens] | |
return stems | |
def predictSMSdata(test_text): | |
categories = ["legitimate", "spam"] | |
categories.sort() | |
# load model | |
filename1 = "LinearSVC_SMS_spam_TR.pickle" | |
file_handle1 = open(filename1, "rb") | |
classifier = pickle.load(file_handle1) | |
file_handle1.close() | |
# load tfidf_vectorizer for transforming test text data | |
filename2 = "tfidf_vectorizer_TR.pickle" | |
file_handle2 = open(filename2, "rb") | |
tfidf_vectorizer = pickle.load(file_handle2) | |
file_handle2.close() | |
test_list=[test_text] | |
tfidf_vectorizer_vectors_test = tfidf_vectorizer.transform(test_list) | |
predicted = classifier.predict(tfidf_vectorizer_vectors_test) | |
print(categories[predicted[0]]) | |
return categories[predicted[0]] | |
stemmerTR = TurkishStemmer() | |
# adding the text that will show in the text box | |
default_value = "Aveadan SUPER bir Muzik Paketi! MAXI yaz, 5555e gonder" | |
text = st.text_area("enter some text!", default_value) | |
if text: | |
out = predictSMSdata(text) | |
st.write("The category of SMS = " + out.upper()) | |