BulatF commited on
Commit
a8cc4f6
·
1 Parent(s): 4349862

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +53 -4
  2. requirements.txt +2 -0
app.py CHANGED
@@ -6,6 +6,10 @@ import torch
6
  import io
7
  import base64
8
  from stqdm import stqdm
 
 
 
 
9
 
10
  # Define the model and tokenizer
11
  model_name = 'nlptown/bert-base-multilingual-uncased-sentiment'
@@ -42,6 +46,11 @@ def main():
42
  if file is not None:
43
  try:
44
  df = pd.read_excel(file)
 
 
 
 
 
45
  review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
46
  df[review_column] = df[review_column].astype(str)
47
  except Exception as e:
@@ -51,15 +60,21 @@ def main():
51
  start_button = st.button('Start Analysis')
52
 
53
  if start_button and df is not None:
 
 
 
 
54
  if review_column in df.columns:
55
  with st.spinner('Performing sentiment analysis...'):
56
  df, df_display = process_reviews(df, review_column)
57
 
58
- display_ratings(df)
59
  display_dataframe(df, df_display)
60
  else:
61
  st.write(f'No column named "{review_column}" found in the uploaded file.')
62
-
 
 
63
 
64
  def process_reviews(df, review_column):
65
  with st.spinner('Classifying reviews...'):
@@ -93,6 +108,23 @@ def process_reviews(df, review_column):
93
 
94
  return df_new, df_display
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  def scores_to_df(df):
98
  for i in range(1, 6):
@@ -132,13 +164,30 @@ def display_dataframe(df, df_display):
132
 
133
  st.dataframe(df_display)
134
 
135
- def display_ratings(df):
136
  cols = st.columns(5)
137
-
138
  for i in range(1, 6):
139
  rating_counts = df[df['Rating'] == i].shape[0]
140
  cols[i-1].markdown(f"### {rating_counts}")
141
  cols[i-1].markdown(f"{'⭐' * i}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
 
144
 
 
6
  import io
7
  import base64
8
  from stqdm import stqdm
9
+ from wordcloud import WordCloud
10
+ import matplotlib.pyplot as plt
11
+ import numpy as np
12
+
13
 
14
  # Define the model and tokenizer
15
  model_name = 'nlptown/bert-base-multilingual-uncased-sentiment'
 
46
  if file is not None:
47
  try:
48
  df = pd.read_excel(file)
49
+ # Drop rows where all columns are NaN
50
+ df = df.dropna(how='all')
51
+ # Replace blank spaces with NaN, then drop rows where all columns are NaN again
52
+ df = df.replace(r'^\s*$', np.nan, regex=True)
53
+ df = df.dropna(how='all')
54
  review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
55
  df[review_column] = df[review_column].astype(str)
56
  except Exception as e:
 
60
  start_button = st.button('Start Analysis')
61
 
62
  if start_button and df is not None:
63
+ # Drop rows with NaN or blank values in the review_column
64
+ df = df[df[review_column].notna()]
65
+ df = df[df[review_column].str.strip() != '']
66
+
67
  if review_column in df.columns:
68
  with st.spinner('Performing sentiment analysis...'):
69
  df, df_display = process_reviews(df, review_column)
70
 
71
+ display_ratings(df, review_column) # updated this line
72
  display_dataframe(df, df_display)
73
  else:
74
  st.write(f'No column named "{review_column}" found in the uploaded file.')
75
+
76
+
77
+
78
 
79
  def process_reviews(df, review_column):
80
  with st.spinner('Classifying reviews...'):
 
108
 
109
  return df_new, df_display
110
 
111
+ def generate_wordclouds(df, review_column):
112
+ st.markdown("# Word Clouds for each rating category")
113
+ for i in range(1, 6):
114
+ # Create a sub-dataframe for each rating category
115
+ sub_df = df[df['Rating'] == i]
116
+ # Join all the reviews in this sub-dataframe
117
+ text = ' '.join(review for review in sub_df[review_column])
118
+ # Generate a word cloud
119
+ wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
120
+ # Display the generated image with matplotlib
121
+ plt.figure()
122
+ plt.imshow(wordcloud, interpolation="bilinear")
123
+ plt.axis("off")
124
+ plt.title(f"Rating {i}")
125
+ st.pyplot(plt)
126
+ plt.close()
127
+
128
 
129
  def scores_to_df(df):
130
  for i in range(1, 6):
 
164
 
165
  st.dataframe(df_display)
166
 
167
+ def display_ratings(df, review_column):
168
  cols = st.columns(5)
169
+
170
  for i in range(1, 6):
171
  rating_counts = df[df['Rating'] == i].shape[0]
172
  cols[i-1].markdown(f"### {rating_counts}")
173
  cols[i-1].markdown(f"{'⭐' * i}")
174
+
175
+ # Generate wordcloud for the given rating category
176
+ sub_df = df[df['Rating'] == i]
177
+ text = ' '.join(review for review in sub_df[review_column])
178
+
179
+ if text.strip(): # Only generate a word cloud if text is not empty
180
+ wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
181
+
182
+ # Display the generated image with matplotlib
183
+ plt.figure()
184
+ plt.imshow(wordcloud, interpolation="bilinear")
185
+ plt.axis("off")
186
+ plt.title(f"Rating {i}")
187
+ cols[i-1].pyplot(plt)
188
+ plt.close()
189
+
190
+
191
 
192
 
193
 
requirements.txt CHANGED
@@ -4,3 +4,5 @@ transformers
4
  torch
5
  stqdm
6
  openpyxl
 
 
 
4
  torch
5
  stqdm
6
  openpyxl
7
+ wordcloud
8
+ matplotlib