GoodML commited on
Commit
8db02ae
Β·
verified Β·
1 Parent(s): 62730e0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import re
4
+ import nltk
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem.porter import PorterStemmer
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ import pickle
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
+
12
+ st.set_page_config(page_title="News Prediction", page_icon=":earth_africa:")
13
+
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained("hamzab/roberta-fake-news-classification")
16
+
17
+ model = AutoModelForSequenceClassification.from_pretrained("hamzab/roberta-fake-news-classification")
18
+
19
+ def predict_fake(title,text):
20
+ input_str = "<title>" + title + "<content>" + text + "<end>"
21
+ input_ids = tokenizer.encode_plus(input_str, max_length=512, padding="max_length", truncation=True, return_tensors="pt")
22
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
23
+ model.to(device)
24
+ with torch.no_grad():
25
+ output = model(input_ids["input_ids"].to(device), attention_mask=input_ids["attention_mask"].to(device))
26
+ return dict(zip(["Fake","Real"], [x.item() for x in list(torch.nn.Softmax()(output.logits)[0])] ))
27
+
28
+
29
+
30
+ # Load the model
31
+ # news_model = pickle.load(open("fake_news_predictor_model.pkl", "rb"))
32
+ # vectorizer = pickle.load(open("fakeNews_tfidf_vectorizer.pkl", "rb"))
33
+
34
+ # Function for preprocessing input text
35
+ # def preProcessing(author, title, text):
36
+ # input_corpus = author +" " + title + " " + text
37
+ # input_corpus = re.sub('[^a-zA-Z]', ' ', input_corpus)
38
+ # input_corpus = input_corpus.lower()
39
+ # input_corpus = input_corpus.split()
40
+ # ps = PorterStemmer()
41
+ # input_corpus = [ps.stem(word) for word in input_corpus if not word in set(stopwords.words('english'))]
42
+ # input_corpus = ' '.join(input_corpus)
43
+ # return input_corpus
44
+
45
+ # # Function to convert text into numerical vector using TF-IDF
46
+ # def convertIntoVector(X):
47
+ # # Now converting the textual data into numerical vectors using the initialized TF-IDF vectorizer
48
+ # X = vectorizer.transform(X)
49
+ # return X
50
+
51
+ def main():
52
+
53
+
54
+ # TO remove streamlit branding and other running animation
55
+ hide_st_style = """
56
+ <style>
57
+ #MainMenu {visibility: hidden;}
58
+ footer {visibility: hidden;}
59
+ </style>
60
+ """
61
+ st.markdown(hide_st_style, unsafe_allow_html=True)
62
+
63
+ # Spinners
64
+ bar = st.progress(0)
65
+ for i in range(101):
66
+ bar.progress(i)
67
+ # time.sleep(0.02) # Adjust the sleep time for the desired speed
68
+
69
+ # st.balloons()
70
+
71
+ # Web content starts
72
+ # Navbar starts
73
+ # Create the Streamlit app
74
+ col1, col2 = st.columns([1, 10])
75
+ with col1:
76
+ st.header(" :globe_with_meridians:")
77
+ with col2:
78
+ st.header("Fake News Prediction App")
79
+
80
+
81
+ # Initialize NLTK resources
82
+ nltk.download('stopwords')
83
+
84
+ # Create sidebar section for app description and links
85
+ st.sidebar.title("Find the fake :mag_right:")
86
+ st.sidebar.write("Welcome the NLP based fake news detector :male-detective:")
87
+ st.sidebar.write("""
88
+
89
+ This web app predicts whether a given news article is real or fake using a logistic regression model trained on a dataset containing 20,000 sample news articles with an impressive accuracy of 96%. The app employs TF-IDF vectorization and NLTK library preprocessing techniques, including lowercase conversion, regular expressions, tokenization, stemming, and merging textual data.
90
+
91
+ Skills Enhanced:
92
+
93
+ πŸ’¬ NLP
94
+ πŸ’» ML
95
+ 🐍 Python
96
+ πŸ“Š Data Analysis
97
+ πŸ€– Transformers
98
+ πŸ€— Hugging face
99
+
100
+
101
+ \nSteps:
102
+
103
+ 1. Data Acquisition: Obtained a dataset of 20,000 news articles from various sources.\n
104
+ 2. Data Preprocessing: Handled missing values, tokenization, lowercase conversion, stemming, and unified text data.\n
105
+ 3. Data Visualization: Used Matplotlib for heatmaps, correlation, and confusion matrices.\n
106
+ 4. Model Creation: Trained a logistic regression model with TF-IDF vectorization for classification.\n
107
+ 5. Evaluation: Evaluated model performance with accuracy analysis.\n
108
+
109
+ By leveraging NLP and ML, this app helps identify false information in news articles, aiding in the fight against misinformation and promoting media literacy.
110
+
111
+ **Credits** 🌟\n
112
+ Coder: Aniket Panchal
113
+ GitHub: https://github.com/Aniket2021448
114
+
115
+ **Contact** πŸ“§\n
116
+ For any inquiries or feedback, please contact [email protected]
117
+
118
+ """)
119
+ st.sidebar.write("Feel free to check out my other apps:")
120
+
121
+
122
+ with st.sidebar.form("app_selection_form"):
123
+ st.write("Feel free to explore my other apps :eyes:")
124
+ app_links = {
125
+ "Movie-mind": "https://movie-mind.streamlit.app/",
126
+ "Comment-Feel": "https://huggingface.co/spaces/GoodML/Comment-Feel"
127
+ }
128
+ selected_app = st.selectbox("Choose an App", list(app_links.keys()))
129
+
130
+ submitted_button = st.form_submit_button("Go to App")
131
+
132
+ # Handle form submission
133
+ if submitted_button:
134
+ selected_app_url = app_links.get(selected_app)
135
+ if selected_app_url:
136
+ st.sidebar.success("Redirected successfully!")
137
+ st.markdown(f'<meta http-equiv="refresh" content="0;URL={selected_app_url}">', unsafe_allow_html=True)
138
+
139
+
140
+ # Dropdown menu for other app links
141
+
142
+ st.sidebar.write("In case the apps are down, because of less usage")
143
+ st.sidebar.write("Kindly reach out to me @ [email protected]")
144
+
145
+
146
+ # Create the form
147
+ with st.form("news_form"):
148
+ st.subheader("Enter News Details")
149
+ # author = st.text_input("Author Name")
150
+ title = st.text_input("Title")
151
+ text = st.text_area("Text")
152
+ submit_button = st.form_submit_button("Submit")
153
+
154
+ # Process form submission and make prediction
155
+ if submit_button:
156
+
157
+ # input_text = preProcessing(title, text)
158
+ # numerical_data = convertIntoVector([input_text])
159
+ prediction = predict_fake(title, text)
160
+ # prediction = news_model.predict(numerical_data)
161
+
162
+ st.subheader(":loudspeaker:Prediction:")
163
+ # st.write("Prediction: ", prediction)
164
+ # st.write("Prediction[0]: ", prediction[0])
165
+ if prediction[0] == 1:
166
+ st.write("This news is predicted to be **real**.:muscle:")
167
+ else:
168
+ st.write("This news is predicted to be **fake**.:shit:")
169
+
170
+
171
+
172
+ if __name__ == "__main__":
173
+ main()
174
+
175
+
176
+
177
+