import streamlit as st import joblib import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer # Import necessary libraries import re import nltk from urllib.parse import urlparse from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer # Initialize NLTK resources nltk.download('omw-1.4') nltk.download('wordnet') nltk.download('wordnet2022') nltk.download('punkt') nltk.download('stopwords') stop_words = set(stopwords.words("english")) # Create a set of English stopwords lemmatizer = WordNetLemmatizer() # Initialize the WordNet Lemmatizer # Define a function for text processing def textProcess(sent): try: if sent is None: # Check if the input is None return "" # Return an empty string if input is None # Remove square brackets, parentheses, and other special characters sent = re.sub('[][)(]', ' ', sent) # Tokenize the text into words sent = [word for word in sent.split() if not urlparse(word).scheme] # Join the words back into a sentence sent = ' '.join(sent) # Remove Twitter usernames (words starting with @) sent = re.sub(r'\@\w+', '', sent) # Remove HTML tags using regular expression sent = re.sub(re.compile("<.*?>"), '', sent) # Remove non-alphanumeric characters (keep only letters and numbers) sent = re.sub("[^A-Za-z0-9]", ' ', sent) # Convert text to lowercase sent = sent.lower() # Split the text into words, strip whitespace, and join them back into a sentence sent = [word.strip() for word in sent.split()] sent = ' '.join(sent) # Tokenize the text again tokens = word_tokenize(sent) # Remove stop words for word in tokens.copy(): if word in stop_words: tokens.remove(word) # Lemmatize the remaining words sent = [lemmatizer.lemmatize(word) for word in tokens] # Join the lemmatized words back into a sentence sent = ' '.join(sent) # Return the processed text return sent except Exception as ex: print(sent, "\n") print("Error ", ex) return "" # Return an empty string in case of an error # Rest of your code... # Load the pre-trained model from joblib model = joblib.load('Stress identification NLP') # Load the TF-IDF vectorizer used during training tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib') # Define the Streamlit web app def main(): st.title("Stress Predictor Web App") st.write("Enter some text to predict if the person is in stress or not.") # Input text box user_input = st.text_area("Enter text here:") if st.button("Predict"): if user_input: # Process the input text processed_text = textProcess(user_input) # Use the same TF-IDF vectorizer to transform the input text tfidf_text = tfidf_vectorizer.transform([processed_text]) # Make predictions using the loaded model prediction = model.predict(tfidf_text)[0] if prediction == 1: result = "This person is in stress." else: result = "This person is not in stress." st.write(result) if __name__ == '__main__': main()