Spaces:

GoodML
/

FindTheFakeNews

Running

File size: 6,522 Bytes

8db02ae

import streamlit as st

import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

st.set_page_config(page_title="News Prediction", page_icon=":earth_africa:")


tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")

model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")

def predict_fake(title,text):
    input_str = "<title>" + title + "<content>" +  text + "<end>"
    input_ids = tokenizer.encode_plus(input_str, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
    device =  'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    with torch.no_grad():
        output = model(input_ids["input_ids"].to(device), attention_mask=input_ids["attention_mask"].to(device))
    return dict(zip(["Fake","Real"], [x.item() for x in list(torch.nn.Softmax()(output.logits)[0])] ))
    


# Load the model
# news_model = pickle.load(open("fake_news_predictor_model.pkl", "rb"))
# vectorizer = pickle.load(open("fakeNews_tfidf_vectorizer.pkl", "rb"))

# Function for preprocessing input text
# def preProcessing(author, title, text):
#     input_corpus = author +" " + title + " " + text
#     input_corpus = re.sub('[^a-zA-Z]', ' ', input_corpus)
#     input_corpus = input_corpus.lower()
#     input_corpus = input_corpus.split()
#     ps = PorterStemmer()
#     input_corpus = [ps.stem(word) for word in input_corpus if not word in set(stopwords.words('english'))]
#     input_corpus = ' '.join(input_corpus)
#     return input_corpus

# # Function to convert text into numerical vector using TF-IDF
# def convertIntoVector(X):
#     # Now converting the textual data into numerical vectors using the initialized TF-IDF vectorizer
#     X = vectorizer.transform(X)
#     return X

def main():
    
    
    # TO remove streamlit branding and other running animation
    hide_st_style = """
                <style>
                #MainMenu {visibility: hidden;}
                footer {visibility: hidden;}
                </style>
    """
    st.markdown(hide_st_style, unsafe_allow_html=True)

    # Spinners
    bar = st.progress(0)
    for i in range(101):
        bar.progress(i)
        # time.sleep(0.02)  # Adjust the sleep time for the desired speed

    # st.balloons()

    # Web content starts
    # Navbar starts
        # Create the Streamlit app
    col1, col2 = st.columns([1, 10])
    with col1:
        st.header("	:globe_with_meridians:")
    with col2:
        st.header("Fake News Prediction App")
        

    # Initialize NLTK resources
    nltk.download('stopwords')

    # Create sidebar section for app description and links
    st.sidebar.title("Find the fake :mag_right:")
    st.sidebar.write("Welcome the NLP based fake news detector :male-detective:")
    st.sidebar.write("""

                This web app predicts whether a given news article is real or fake using a logistic regression model trained on a dataset containing 20,000 sample news articles with an impressive accuracy of 96%. The app employs TF-IDF vectorization and NLTK library preprocessing techniques, including lowercase conversion, regular expressions, tokenization, stemming, and merging textual data.

                Skills Enhanced:

                💬 NLP
                💻 ML
                🐍 Python
                📊 Data Analysis
                🤖 Transformers
                🤗 Hugging face
                
                    
\nSteps:   
                         
    1. Data Acquisition: Obtained a dataset of 20,000 news articles from various sources.\n
    2. Data Preprocessing: Handled missing values, tokenization, lowercase conversion, stemming, and unified text data.\n
    3. Data Visualization: Used Matplotlib for heatmaps, correlation, and confusion matrices.\n
    4. Model Creation: Trained a logistic regression model with TF-IDF vectorization for classification.\n
    5. Evaluation: Evaluated model performance with accuracy analysis.\n

By leveraging NLP and ML, this app helps identify false information in news articles, aiding in the fight against misinformation and promoting media literacy.
        
**Credits** 🌟\n
Coder: Aniket Panchal
GitHub: https://github.com/Aniket2021448

**Contact** 📧\n
For any inquiries or feedback, please contact [email protected]
    
    """)
    st.sidebar.write("Feel free to check out my other apps:")


    with st.sidebar.form("app_selection_form"):
        st.write("Feel free to explore my other apps :eyes:")
        app_links = {
            "Movie-mind": "https://movie-mind.streamlit.app/",
            "Comment-Feel": "https://huggingface.co./spaces/GoodML/Comment-Feel"
        }
        selected_app = st.selectbox("Choose an App", list(app_links.keys()))

        submitted_button = st.form_submit_button("Go to App")

    # Handle form submission
    if submitted_button:
        selected_app_url = app_links.get(selected_app)
        if selected_app_url:
            st.sidebar.success("Redirected successfully!")
            st.markdown(f'<meta http-equiv="refresh" content="0;URL={selected_app_url}">', unsafe_allow_html=True)

    
    # Dropdown menu for other app links

    st.sidebar.write("In case the apps are down, because of less usage")
    st.sidebar.write("Kindly reach out to me @ [email protected]")
    

    # Create the form
    with st.form("news_form"):
        st.subheader("Enter News Details")
        # author = st.text_input("Author Name")
        title = st.text_input("Title")
        text = st.text_area("Text")
        submit_button = st.form_submit_button("Submit")

    # Process form submission and make prediction
    if submit_button:

        # input_text = preProcessing(title, text) 
        # numerical_data = convertIntoVector([input_text])
        prediction = predict_fake(title, text)
        # prediction = news_model.predict(numerical_data)
        
        st.subheader(":loudspeaker:Prediction:")
        # st.write("Prediction: ", prediction)
        # st.write("Prediction[0]: ", prediction[0])
        if prediction[0] == 1:
            st.write("This news is predicted to be **real**.:muscle:")
        else:
            st.write("This news is predicted to be **fake**.:shit:")



if __name__ == "__main__":
    main()