File size: 6,522 Bytes
8db02ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import streamlit as st

import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

st.set_page_config(page_title="News Prediction", page_icon=":earth_africa:")


tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")

model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")

def predict_fake(title,text):
    input_str = "<title>" + title + "<content>" +  text + "<end>"
    input_ids = tokenizer.encode_plus(input_str, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
    device =  'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    with torch.no_grad():
        output = model(input_ids["input_ids"].to(device), attention_mask=input_ids["attention_mask"].to(device))
    return dict(zip(["Fake","Real"], [x.item() for x in list(torch.nn.Softmax()(output.logits)[0])] ))
    


# Load the model
# news_model = pickle.load(open("fake_news_predictor_model.pkl", "rb"))
# vectorizer = pickle.load(open("fakeNews_tfidf_vectorizer.pkl", "rb"))

# Function for preprocessing input text
# def preProcessing(author, title, text):
#     input_corpus = author +" " + title + " " + text
#     input_corpus = re.sub('[^a-zA-Z]', ' ', input_corpus)
#     input_corpus = input_corpus.lower()
#     input_corpus = input_corpus.split()
#     ps = PorterStemmer()
#     input_corpus = [ps.stem(word) for word in input_corpus if not word in set(stopwords.words('english'))]
#     input_corpus = ' '.join(input_corpus)
#     return input_corpus

# # Function to convert text into numerical vector using TF-IDF
# def convertIntoVector(X):
#     # Now converting the textual data into numerical vectors using the initialized TF-IDF vectorizer
#     X = vectorizer.transform(X)
#     return X

def main():
    
    
    # TO remove streamlit branding and other running animation
    hide_st_style = """
                <style>
                #MainMenu {visibility: hidden;}
                footer {visibility: hidden;}
                </style>
    """
    st.markdown(hide_st_style, unsafe_allow_html=True)

    # Spinners
    bar = st.progress(0)
    for i in range(101):
        bar.progress(i)
        # time.sleep(0.02)  # Adjust the sleep time for the desired speed

    # st.balloons()

    # Web content starts
    # Navbar starts
        # Create the Streamlit app
    col1, col2 = st.columns([1, 10])
    with col1:
        st.header("	:globe_with_meridians:")
    with col2:
        st.header("Fake News Prediction App")
        

    # Initialize NLTK resources
    nltk.download('stopwords')

    # Create sidebar section for app description and links
    st.sidebar.title("Find the fake :mag_right:")
    st.sidebar.write("Welcome the NLP based fake news detector :male-detective:")
    st.sidebar.write("""

                This web app predicts whether a given news article is real or fake using a logistic regression model trained on a dataset containing 20,000 sample news articles with an impressive accuracy of 96%. The app employs TF-IDF vectorization and NLTK library preprocessing techniques, including lowercase conversion, regular expressions, tokenization, stemming, and merging textual data.

                Skills Enhanced:

                πŸ’¬ NLP
                πŸ’» ML
                🐍 Python
                πŸ“Š Data Analysis
                πŸ€– Transformers
                πŸ€— Hugging face
                
                    
\nSteps:   
                         
    1. Data Acquisition: Obtained a dataset of 20,000 news articles from various sources.\n
    2. Data Preprocessing: Handled missing values, tokenization, lowercase conversion, stemming, and unified text data.\n
    3. Data Visualization: Used Matplotlib for heatmaps, correlation, and confusion matrices.\n
    4. Model Creation: Trained a logistic regression model with TF-IDF vectorization for classification.\n
    5. Evaluation: Evaluated model performance with accuracy analysis.\n

By leveraging NLP and ML, this app helps identify false information in news articles, aiding in the fight against misinformation and promoting media literacy.
        
**Credits** 🌟\n
Coder: Aniket Panchal
GitHub: https://github.com/Aniket2021448

**Contact** πŸ“§\n
For any inquiries or feedback, please contact [email protected]
    
    """)
    st.sidebar.write("Feel free to check out my other apps:")


    with st.sidebar.form("app_selection_form"):
        st.write("Feel free to explore my other apps :eyes:")
        app_links = {
            "Movie-mind": "https://movie-mind.streamlit.app/",
            "Comment-Feel": "https://huggingface.co./spaces/GoodML/Comment-Feel"
        }
        selected_app = st.selectbox("Choose an App", list(app_links.keys()))

        submitted_button = st.form_submit_button("Go to App")

    # Handle form submission
    if submitted_button:
        selected_app_url = app_links.get(selected_app)
        if selected_app_url:
            st.sidebar.success("Redirected successfully!")
            st.markdown(f'<meta http-equiv="refresh" content="0;URL={selected_app_url}">', unsafe_allow_html=True)

    
    # Dropdown menu for other app links

    st.sidebar.write("In case the apps are down, because of less usage")
    st.sidebar.write("Kindly reach out to me @ [email protected]")
    

    # Create the form
    with st.form("news_form"):
        st.subheader("Enter News Details")
        # author = st.text_input("Author Name")
        title = st.text_input("Title")
        text = st.text_area("Text")
        submit_button = st.form_submit_button("Submit")

    # Process form submission and make prediction
    if submit_button:

        # input_text = preProcessing(title, text) 
        # numerical_data = convertIntoVector([input_text])
        prediction = predict_fake(title, text)
        # prediction = news_model.predict(numerical_data)
        
        st.subheader(":loudspeaker:Prediction:")
        # st.write("Prediction: ", prediction)
        # st.write("Prediction[0]: ", prediction[0])
        if prediction[0] == 1:
            st.write("This news is predicted to be **real**.:muscle:")
        else:
            st.write("This news is predicted to be **fake**.:shit:")



if __name__ == "__main__":
    main()