import streamlit as st
import joblib
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
# Import necessary libraries
import re
import nltk
from urllib.parse import urlparse
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Initialize NLTK resources
nltk.download('omw-1.4')
nltk.download('wordnet') 
nltk.download('wordnet2022')
nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words("english"))  # Create a set of English stopwords
lemmatizer = WordNetLemmatizer()  # Initialize the WordNet Lemmatizer

# Define a function for text processing
def textProcess(sent):
    try:
        if sent is None:  # Check if the input is None
            return ""  # Return an empty string if input is None

        # Remove square brackets, parentheses, and other special characters
        sent = re.sub('[][)(]', ' ', sent)

        # Tokenize the text into words
        sent = [word for word in sent.split() if not urlparse(word).scheme]

        # Join the words back into a sentence
        sent = ' '.join(sent)

        # Remove Twitter usernames (words starting with @)
        sent = re.sub(r'\@\w+', '', sent)

        # Remove HTML tags using regular expression
        sent = re.sub(re.compile("<.*?>"), '', sent)

        # Remove non-alphanumeric characters (keep only letters and numbers)
        sent = re.sub("[^A-Za-z0-9]", ' ', sent)

        # Convert text to lowercase
        sent = sent.lower()

        # Split the text into words, strip whitespace, and join them back into a sentence
        sent = [word.strip() for word in sent.split()]
        sent = ' '.join(sent)

        # Tokenize the text again
        tokens = word_tokenize(sent)

        # Remove stop words
        for word in tokens.copy():
            if word in stop_words:
                tokens.remove(word)

        # Lemmatize the remaining words
        sent = [lemmatizer.lemmatize(word) for word in tokens]

        # Join the lemmatized words back into a sentence
        sent = ' '.join(sent)

        # Return the processed text
        return sent

    except Exception as ex:
        print(sent, "\n")
        print("Error ", ex)
        return ""  # Return an empty string in case of an error

# Rest of your code...

# Load the pre-trained model from joblib
model = joblib.load('Stress identification NLP')

# Load the TF-IDF vectorizer used during training
tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')

# Define the Streamlit web app
def main():
    st.title("Stress Predictor Web App")
    st.write("Enter some text to predict if the person is in stress or not.")

    # Input text box
    user_input = st.text_area("Enter text here:")

    if st.button("Predict"):
        if user_input:
            # Process the input text
            processed_text = textProcess(user_input)

            # Use the same TF-IDF vectorizer to transform the input text
            tfidf_text = tfidf_vectorizer.transform([processed_text])

            # Make predictions using the loaded model
            prediction = model.predict(tfidf_text)[0]

            if prediction == 1:
                result = "This person is in stress."
            else:
                result = "This person is not in stress."

            st.write(result)

if __name__ == '__main__':
    main()