BulatF commited on
Commit
768bcdc
·
1 Parent(s): 2fd93e8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -20
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import streamlit as st
2
  import pandas as pd
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
4
  import torch.nn.functional as F
5
  import torch
6
  import io
@@ -18,9 +20,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
  st.set_page_config(layout="wide")
19
 
20
  # Import the new model and tokenizer
21
- class_model_name = 'facebook/bart-large-mnli'
22
- class_model = AutoModelForSequenceClassification.from_pretrained(class_model_name)
23
- class_tokenizer = AutoTokenizer.from_pretrained(class_model_name)
24
 
25
 
26
  #defs
@@ -43,13 +44,20 @@ def get_table_download_link(df):
43
 
44
 
45
  # Function for classifying with the new model
46
- def classify_with_new_classes(reviews, class_name):
47
- inputs = class_tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
48
- outputs = class_model(**inputs)
49
- probabilities = F.softmax(outputs.logits, dim=1).tolist()
50
- class_scores = [prob[1] for prob in probabilities] # Assuming binary classification
 
 
 
 
 
51
  return class_scores
52
 
 
 
53
  def main():
54
  st.title('Sentiment Analysis')
55
  st.markdown('Upload an Excel file to get sentiment analytics')
@@ -77,19 +85,21 @@ def main():
77
 
78
  start_button = st.button('Start Analysis')
79
 
 
80
  if start_button and df is not None:
81
  # Drop rows with NaN or blank values in the review_column
82
  df = df[df[review_column].notna()]
83
  df = df[df[review_column].str.strip() != '']
84
-
85
  class_names = [name.strip() for name in class_names.split(',')] # Split class names into a list
86
  for name in class_names: # Add a new column for each class name
87
- df[name] = 0.0
88
-
 
89
  if review_column in df.columns:
90
  with st.spinner('Performing sentiment analysis...'):
91
  df, df_display = process_reviews(df, review_column, class_names)
92
-
93
  display_ratings(df, review_column) # updated this line
94
  display_dataframe(df, df_display)
95
  else:
@@ -114,13 +124,14 @@ def process_reviews(df, review_column, class_names):
114
  raw_scores.extend(batch_scores)
115
  review_counter += len(batch_reviews)
116
  progress_bar.progress(review_counter / total_reviews)
117
-
 
 
 
118
  class_scores_dict = {} # New dictionary to store class scores
119
- for name in class_names:
120
- with st.spinner(f'Generating classes for {name}...'):
121
- class_scores = classify_with_new_classes(df[review_column].tolist(), name)
122
- df[name] = class_scores
123
- class_scores_dict[name] = class_scores # Store class scores in the dictionary
124
 
125
  # Add a new column with the class that has the highest score
126
  df['Highest Class'] = df[class_names].idxmax(axis=1)
@@ -131,7 +142,7 @@ def process_reviews(df, review_column, class_names):
131
  df_display = scores_to_percent(df_new.copy())
132
 
133
  # Get all columns excluding the created ones and the review_column
134
- remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class']]
135
 
136
  # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
137
  df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + class_names + ['Highest Class'] + remaining_columns]
@@ -144,7 +155,6 @@ def process_reviews(df, review_column, class_names):
144
 
145
 
146
 
147
-
148
  def scores_to_df(df):
149
  for i in range(1, 6):
150
  df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)
 
1
  import streamlit as st
2
  import pandas as pd
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ from transformers import pipeline
5
+
6
  import torch.nn.functional as F
7
  import torch
8
  import io
 
20
  st.set_page_config(layout="wide")
21
 
22
  # Import the new model and tokenizer
23
+
24
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 
25
 
26
 
27
  #defs
 
44
 
45
 
46
  # Function for classifying with the new model
47
+ def classify_with_new_classes(reviews, class_names):
48
+ class_scores = []
49
+
50
+ for review in reviews:
51
+ result = classifier(review, class_names)
52
+ scores_dict = dict(zip(result['labels'], result['scores']))
53
+ # Reorder scores to match the original class_names order
54
+ scores = [scores_dict[name] for name in class_names]
55
+ class_scores.append(scores)
56
+
57
  return class_scores
58
 
59
+
60
+
61
  def main():
62
  st.title('Sentiment Analysis')
63
  st.markdown('Upload an Excel file to get sentiment analytics')
 
85
 
86
  start_button = st.button('Start Analysis')
87
 
88
+
89
  if start_button and df is not None:
90
  # Drop rows with NaN or blank values in the review_column
91
  df = df[df[review_column].notna()]
92
  df = df[df[review_column].str.strip() != '']
93
+
94
  class_names = [name.strip() for name in class_names.split(',')] # Split class names into a list
95
  for name in class_names: # Add a new column for each class name
96
+ if name not in df.columns:
97
+ df[name] = 0.0
98
+
99
  if review_column in df.columns:
100
  with st.spinner('Performing sentiment analysis...'):
101
  df, df_display = process_reviews(df, review_column, class_names)
102
+
103
  display_ratings(df, review_column) # updated this line
104
  display_dataframe(df, df_display)
105
  else:
 
124
  raw_scores.extend(batch_scores)
125
  review_counter += len(batch_reviews)
126
  progress_bar.progress(review_counter / total_reviews)
127
+
128
+ with st.spinner('Generating classes...'):
129
+ class_scores = classify_with_new_classes(df[review_column].tolist(), class_names)
130
+
131
  class_scores_dict = {} # New dictionary to store class scores
132
+ for i, name in enumerate(class_names):
133
+ df[name] = [score[i] for score in class_scores]
134
+ class_scores_dict[name] = [score[i] for score in class_scores]
 
 
135
 
136
  # Add a new column with the class that has the highest score
137
  df['Highest Class'] = df[class_names].idxmax(axis=1)
 
142
  df_display = scores_to_percent(df_new.copy())
143
 
144
  # Get all columns excluding the created ones and the review_column
145
+ remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + class_names]
146
 
147
  # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
148
  df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + class_names + ['Highest Class'] + remaining_columns]
 
155
 
156
 
157
 
 
158
  def scores_to_df(df):
159
  for i in range(1, 6):
160
  df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)