File size: 6,522 Bytes
8db02ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import streamlit as st
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
st.set_page_config(page_title="News Prediction", page_icon=":earth_africa:")
tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")
model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")
def predict_fake(title,text):
input_str = "<title>" + title + "<content>" + text + "<end>"
input_ids = tokenizer.encode_plus(input_str, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
with torch.no_grad():
output = model(input_ids["input_ids"].to(device), attention_mask=input_ids["attention_mask"].to(device))
return dict(zip(["Fake","Real"], [x.item() for x in list(torch.nn.Softmax()(output.logits)[0])] ))
# Load the model
# news_model = pickle.load(open("fake_news_predictor_model.pkl", "rb"))
# vectorizer = pickle.load(open("fakeNews_tfidf_vectorizer.pkl", "rb"))
# Function for preprocessing input text
# def preProcessing(author, title, text):
# input_corpus = author +" " + title + " " + text
# input_corpus = re.sub('[^a-zA-Z]', ' ', input_corpus)
# input_corpus = input_corpus.lower()
# input_corpus = input_corpus.split()
# ps = PorterStemmer()
# input_corpus = [ps.stem(word) for word in input_corpus if not word in set(stopwords.words('english'))]
# input_corpus = ' '.join(input_corpus)
# return input_corpus
# # Function to convert text into numerical vector using TF-IDF
# def convertIntoVector(X):
# # Now converting the textual data into numerical vectors using the initialized TF-IDF vectorizer
# X = vectorizer.transform(X)
# return X
def main():
# TO remove streamlit branding and other running animation
hide_st_style = """
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
st.markdown(hide_st_style, unsafe_allow_html=True)
# Spinners
bar = st.progress(0)
for i in range(101):
# time.sleep(0.02) # Adjust the sleep time for the desired speed
# st.balloons()
# Web content starts
# Navbar starts
# Create the Streamlit app
col1, col2 = st.columns([1, 10])
with col1:
st.header(" :globe_with_meridians:")
with col2:
st.header("Fake News Prediction App")
# Initialize NLTK resources'stopwords')
# Create sidebar section for app description and links
st.sidebar.title("Find the fake :mag_right:")
st.sidebar.write("Welcome the NLP based fake news detector :male-detective:")
This web app predicts whether a given news article is real or fake using a logistic regression model trained on a dataset containing 20,000 sample news articles with an impressive accuracy of 96%. The app employs TF-IDF vectorization and NLTK library preprocessing techniques, including lowercase conversion, regular expressions, tokenization, stemming, and merging textual data.
Skills Enhanced:
π¬ NLP
π» ML
π Python
π Data Analysis
π€ Transformers
π€ Hugging face
1. Data Acquisition: Obtained a dataset of 20,000 news articles from various sources.\n
2. Data Preprocessing: Handled missing values, tokenization, lowercase conversion, stemming, and unified text data.\n
3. Data Visualization: Used Matplotlib for heatmaps, correlation, and confusion matrices.\n
4. Model Creation: Trained a logistic regression model with TF-IDF vectorization for classification.\n
5. Evaluation: Evaluated model performance with accuracy analysis.\n
By leveraging NLP and ML, this app helps identify false information in news articles, aiding in the fight against misinformation and promoting media literacy.
**Credits** π\n
Coder: Aniket Panchal
**Contact** π§\n
For any inquiries or feedback, please contact [email protected]
st.sidebar.write("Feel free to check out my other apps:")
with st.sidebar.form("app_selection_form"):
st.write("Feel free to explore my other apps :eyes:")
app_links = {
"Movie-mind": "",
"Comment-Feel": ""
selected_app = st.selectbox("Choose an App", list(app_links.keys()))
submitted_button = st.form_submit_button("Go to App")
# Handle form submission
if submitted_button:
selected_app_url = app_links.get(selected_app)
if selected_app_url:
st.sidebar.success("Redirected successfully!")
st.markdown(f'<meta http-equiv="refresh" content="0;URL={selected_app_url}">', unsafe_allow_html=True)
# Dropdown menu for other app links
st.sidebar.write("In case the apps are down, because of less usage")
st.sidebar.write("Kindly reach out to me @ [email protected]")
# Create the form
with st.form("news_form"):
st.subheader("Enter News Details")
# author = st.text_input("Author Name")
title = st.text_input("Title")
text = st.text_area("Text")
submit_button = st.form_submit_button("Submit")
# Process form submission and make prediction
if submit_button:
# input_text = preProcessing(title, text)
# numerical_data = convertIntoVector([input_text])
prediction = predict_fake(title, text)
# prediction = news_model.predict(numerical_data)
# st.write("Prediction: ", prediction)
# st.write("Prediction[0]: ", prediction[0])
if prediction[0] == 1:
st.write("This news is predicted to be **real**.:muscle:")
st.write("This news is predicted to be **fake**.:shit:")
if __name__ == "__main__":