import streamlit as st import re import nltk from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer from sklearn.feature_extraction.text import TfidfVectorizer import pickle import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification st.set_page_config(page_title="News Prediction", page_icon=":earth_africa:") tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification") model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification") def predict_fake(title,text): input_str = "" + title + "<content>" + text + "<end>" input_ids = tokenizer.encode_plus(input_str, max_length=512, padding="max_length", truncation=True, return_tensors="pt") device = 'cuda' if torch.cuda.is_available() else 'cpu' with torch.no_grad(): output = model(input_ids["input_ids"].to(device), attention_mask=input_ids["attention_mask"].to(device)) return dict(zip(["Fake","Real"], [x.item() for x in list(torch.nn.Softmax()(output.logits)[0])] )) # Load the model # news_model = pickle.load(open("fake_news_predictor_model.pkl", "rb")) # vectorizer = pickle.load(open("fakeNews_tfidf_vectorizer.pkl", "rb")) # Function for preprocessing input text # def preProcessing(author, title, text): # input_corpus = author +" " + title + " " + text # input_corpus = re.sub('[^a-zA-Z]', ' ', input_corpus) # input_corpus = input_corpus.lower() # input_corpus = input_corpus.split() # ps = PorterStemmer() # input_corpus = [ps.stem(word) for word in input_corpus if not word in set(stopwords.words('english'))] # input_corpus = ' '.join(input_corpus) # return input_corpus # # Function to convert text into numerical vector using TF-IDF # def convertIntoVector(X): # # Now converting the textual data into numerical vectors using the initialized TF-IDF vectorizer # X = vectorizer.transform(X) # return X def main(): # TO remove streamlit branding and other running animation hide_st_style = """ <style> #MainMenu {visibility: hidden;} footer {visibility: hidden;} </style> """ st.markdown(hide_st_style, unsafe_allow_html=True) # Spinners bar = st.progress(0) for i in range(101): bar.progress(i) # time.sleep(0.02) # Adjust the sleep time for the desired speed # st.balloons() # Web content starts # Navbar starts # Create the Streamlit app col1, col2 = st.columns([1, 10]) with col1: st.header(" :globe_with_meridians:") with col2: st.header("Fake News Prediction App") # Initialize NLTK resources'stopwords') # Create sidebar section for app description and links st.sidebar.title("Find the fake :mag_right:") st.sidebar.write("Welcome the NLP based fake news detector :male-detective:") st.sidebar.write(""" This web app predicts whether a given news article is real or fake using a logistic regression model trained on a dataset containing 20,000 sample news articles with an impressive accuracy of 96%. The app employs TF-IDF vectorization and NLTK library preprocessing techniques, including lowercase conversion, regular expressions, tokenization, stemming, and merging textual data. Skills Enhanced: 💬 NLP 💻 ML 🐍 Python 📊 Data Analysis 🤖 Transformers 🤗 Hugging face \nSteps: 1. Data Acquisition: Obtained a dataset of 20,000 news articles from various sources.\n 2. Data Preprocessing: Handled missing values, tokenization, lowercase conversion, stemming, and unified text data.\n 3. Data Visualization: Used Matplotlib for heatmaps, correlation, and confusion matrices.\n 4. Model Creation: Trained a logistic regression model with TF-IDF vectorization for classification.\n 5. Evaluation: Evaluated model performance with accuracy analysis.\n By leveraging NLP and ML, this app helps identify false information in news articles, aiding in the fight against misinformation and promoting media literacy. **Credits** 🌟\n Coder: Aniket Panchal GitHub: **Contact** 📧\n For any inquiries or feedback, please contact """) st.sidebar.write("Feel free to check out my other apps:") with st.sidebar.form("app_selection_form"): st.write("Feel free to explore my other apps :eyes:") app_links = { "Movie-mind": "", "Comment-Feel": "" } selected_app = st.selectbox("Choose an App", list(app_links.keys())) submitted_button = st.form_submit_button("Go to App") # Handle form submission if submitted_button: selected_app_url = app_links.get(selected_app) if selected_app_url: st.sidebar.success("Redirected successfully!") st.markdown(f'<meta http-equiv="refresh" content="0;URL={selected_app_url}">', unsafe_allow_html=True) # Dropdown menu for other app links st.sidebar.write("In case the apps are down, because of less usage") st.sidebar.write("Kindly reach out to me @") # Create the form with st.form("news_form"): st.subheader("Enter News Details") # author = st.text_input("Author Name") title = st.text_input("Title") text = st.text_area("Text") submit_button = st.form_submit_button("Submit") # Process form submission and make prediction if submit_button: # input_text = preProcessing(title, text) # numerical_data = convertIntoVector([input_text]) prediction = predict_fake(title, text) # prediction = news_model.predict(numerical_data) st.subheader(":loudspeaker:Prediction:") # st.write("Prediction: ", prediction) # st.write("Prediction[0]: ", prediction[0]) if prediction[0] == 1: st.write("This news is predicted to be **real**.:muscle:") else: st.write("This news is predicted to be **fake**.:shit:") if __name__ == "__main__": main()