Spaces:
Runtime error
Runtime error
File size: 7,963 Bytes
037d195 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import numpy as np
import csv
from typing import Optional
from urllib.request import urlopen
import gradio as gr
class SentimentTransform():
def __init__(
self,
model_name: str = "cardiffnlp/twitter-roberta-base-sentiment",
highlight: bool = False,
positive_sentiment_name: str = "positive",
max_number_of_shap_documents: Optional[int] = None,
min_abs_score: float = 0.1,
sensitivity: float = 0,
**kwargs,
):
"""
Sentiment Ops.
Parameters
-------------
model_name: str
The name of the model
sensitivity: float
How confident it is about being `neutral`. If you are dealing with news sources,
you probably want less sensitivity
"""
self.model_name = model_name
self.highlight = highlight
self.positive_sentiment_name = positive_sentiment_name
self.max_number_of_shap_documents = max_number_of_shap_documents
self.min_abs_score = min_abs_score
self.sensitivity = sensitivity
for k, v in kwargs.items():
setattr(self, k, v)
def preprocess(self, text: str):
new_text = []
for t in text.split(" "):
t = "@user" if t.startswith("@") and len(t) > 1 else t
t = "http" if t.startswith("http") else t
new_text.append(t)
return " ".join(new_text)
@property
def classifier(self):
if not hasattr(self, "_classifier"):
import transformers
self._classifier = transformers.pipeline(
return_all_scores=True,
model=self.model_name,
)
return self._classifier
def _get_label_mapping(self, task: str):
# Note: this is specific to the current model
labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urlopen(mapping_link) as f:
html = f.read().decode("utf-8").split("\n")
csvreader = csv.reader(html, delimiter="\t")
labels = [row[1] for row in csvreader if len(row) > 1]
return labels
@property
def label_mapping(self):
return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}
def analyze_sentiment(
self,
text,
highlight: bool = False,
positive_sentiment_name: str = "positive",
max_number_of_shap_documents: Optional[int] = None,
min_abs_score: float = 0.1,
):
if text is None:
return None
labels = self.classifier([str(text)], truncation=True, max_length=512)
ind_max = np.argmax([l["score"] for l in labels[0]])
sentiment = labels[0][ind_max]["label"]
max_score = labels[0][ind_max]["score"]
sentiment = self.label_mapping.get(sentiment, sentiment)
if sentiment.lower() == "neutral" and max_score > self.sensitivity:
overall_sentiment = 1e-5
elif sentiment.lower() == "neutral":
# get the next highest score
new_labels = labels[0][:ind_max] + labels[0][(ind_max + 1):]
new_ind_max = np.argmax([l["score"] for l in new_labels])
new_max_score = new_labels[new_ind_max]["score"]
new_sentiment = new_labels[new_ind_max]["label"]
new_sentiment = self.label_mapping.get(new_sentiment, new_sentiment)
overall_sentiment = self._calculate_overall_sentiment(
new_max_score, new_sentiment
)
else:
overall_sentiment = self._calculate_overall_sentiment(max_score, sentiment)
# Adjust to avoid bug
if overall_sentiment == 0:
overall_sentiment = 1e-5
if not highlight:
return {
"sentiment": sentiment,
"overall_sentiment_score": overall_sentiment,
}
shap_documents = self.get_shap_values(
text,
sentiment_ind=ind_max,
max_number_of_shap_documents=max_number_of_shap_documents,
min_abs_score=min_abs_score,
)
return {
"sentiment": sentiment,
"score": max_score,
"overall_sentiment": overall_sentiment,
"highlight_chunk_": shap_documents,
}
def _calculate_overall_sentiment(self, score: float, sentiment: str):
if sentiment.lower().strip() == self.positive_sentiment_name:
return score
else:
return -score
# def explainer(self):
# if hasattr(self, "_explainer"):
# return self._explainer
# else:
# try:
# import shap
# except ModuleNotFoundError:
# raise MissingPackageError("shap")
# self._explainer = shap.Explainer(self.classifier)
# return self._explainer
def get_shap_values(
self,
text: str,
sentiment_ind: int = 2,
max_number_of_shap_documents: Optional[int] = None,
min_abs_score: float = 0.1,
):
"""Get SHAP values"""
shap_values = self.explainer([text])
cohorts = {"": shap_values}
cohort_labels = list(cohorts.keys())
cohort_exps = list(cohorts.values())
features = cohort_exps[0].data
feature_names = cohort_exps[0].feature_names
values = np.array([cohort_exps[i].values for i in range(len(cohort_exps))])
shap_docs = [
{"text": v, "score": f}
for f, v in zip(
[x[sentiment_ind] for x in values[0][0].tolist()], feature_names[0]
)
]
if max_number_of_shap_documents is not None:
sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)
else:
sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)[
:max_number_of_shap_documents
]
return [d for d in sorted_scores if abs(d["score"]) > min_abs_score]
def transform(self, text):
# # For each document, update the field
# sentiment_docs = [{"_id": d["_id"]} for d in documents]
# for i, t in enumerate(self.text_fields):
# if self.output_fields is not None:
# output_field = self.output_fields[i]
# else:
# output_field = self._get_output_field(t)
sentiment = self.analyze_sentiment(
text,
highlight=self.highlight,
max_number_of_shap_documents=self.max_number_of_shap_documents,
min_abs_score=self.min_abs_score, )
return sentiment
def sentiment_classifier(text, model_type, sensitivity):
if model_type == 'Social Media Model':
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
elif model_type == 'Survey Model':
model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
else:
model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
model = SentimentTransform(model_name=model_name, sensitivity=sensitivity)
res_dict = model.transform(text)
return res_dict['sentiment'], res_dict['overall_sentiment_score']
demo = gr.Interface(
fn=sentiment_classifier,
inputs=[gr.Textbox(placeholder="Put the text here and click 'submit' to predict its sentiment", label="Input Text"), gr.Dropdown(["Social Media Model", "Survey Model"], value="Survey Model", label="Select the Model that you want to use."), gr.Slider(0, 1, step = 0.01, label="Sensitivity (How confident it is about being `neutral`. If you are dealing with news sources, you probably want less sensitivity.)")],
outputs=[gr.Textbox(label='Sentiment'), gr.Textbox(label='Sentiment Score')],
)
demo.launch(debug=True) |