|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import plotly.express as pe |
|
from wordcloud import WordCloud, STOPWORDS |
|
from nltk import FreqDist |
|
|
|
st.set_option('deprecation.showPyplotGlobalUse', False) |
|
|
|
|
|
|
|
st.title("Welcome To The Explore Page: ") |
|
|
|
st.markdown("On this page you will be able to some EDA Visuals") |
|
|
|
|
|
|
|
data= pd.read_csv("datasets/Train.csv") |
|
|
|
clean_data= pd.read_csv("datasets/clean_copy.csv") |
|
|
|
clean_data= clean_data.dropna() |
|
|
|
|
|
|
|
unclean_words= " ".join(data["safe_text"]) |
|
|
|
wc= WordCloud(stopwords=STOPWORDS).generate(unclean_words) |
|
|
|
plt.figure(figsize= (5,10)) |
|
plt.title("Most common Words in unclean Dataset") |
|
plt.imshow(wc) |
|
st.pyplot() |
|
|
|
|
|
clean_words= ' '.join(clean_data["clean_tweet"]).split() |
|
|
|
freq_words= pd.DataFrame(FreqDist(clean_words).most_common(20), columns= ["word", "count"]) |
|
|
|
fig= pe.treemap(data_frame=freq_words, path=["word"], values= "count", title= "Top 20 Most Frequent Words After Cleaning") |
|
|
|
st.plotly_chart(fig) |
|
|
|
|
|
|
|
data["tweet_length"]= [len(i.split(" ")) for i in data["safe_text"]] |
|
|
|
words= data["tweet_length"].value_counts().reset_index() |
|
fig_2= pe.scatter(data_frame=words, x="tweet_length", y="count", size= "count", color= "tweet_length", title= "Tweet Lenghts") |
|
|
|
st.plotly_chart(fig_2) |