Spaces:

pouchedfox
/

Sen

Build error

App Files Files Community

pouchedfox commited on Aug 23, 2022

Commit

25d443b

1 Parent(s): 5047d71

Upload app.py

Browse files

Files changed (1) hide show

app.py +201 -0

app.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import numpy as np
+import csv
+from typing import Optional
+from urllib.request import urlopen
+import gradio as gr
+class SentimentTransform():
+    def __init__(
+            self,
+            model_name: str = "cardiffnlp/twitter-roberta-base-sentiment",
+            highlight: bool = False,
+            positive_sentiment_name: str = "positive",
+            max_number_of_shap_documents: Optional[int] = None,
+            min_abs_score: float = 0.1,
+            sensitivity: float = 0,
+            **kwargs,
+    ):
+        """
+        Sentiment Ops.
+        Parameters
+        -------------
+        model_name: str
+            The name of the model
+        sensitivity: float
+            How confident it is about being `neutral`. If you are dealing with news sources,
+            you probably want less sensitivity
+        """
+        self.model_name = model_name
+        self.highlight = highlight
+        self.positive_sentiment_name = positive_sentiment_name
+        self.max_number_of_shap_documents = max_number_of_shap_documents
+        self.min_abs_score = min_abs_score
+        self.sensitivity = sensitivity
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+    def preprocess(self, text: str):
+        new_text = []
+        for t in text.split(" "):
+            t = "@user" if t.startswith("@") and len(t) > 1 else t
+            t = "http" if t.startswith("http") else t
+            new_text.append(t)
+        return " ".join(new_text)
+    @property
+    def classifier(self):
+        if not hasattr(self, "_classifier"):
+            import transformers
+            self._classifier = transformers.pipeline(
+                return_all_scores=True,
+                model=self.model_name,
+            )
+        return self._classifier
+    def _get_label_mapping(self, task: str):
+        # Note: this is specific to the current model
+        labels = []
+        mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
+        with urlopen(mapping_link) as f:
+            html = f.read().decode("utf-8").split("\n")
+            csvreader = csv.reader(html, delimiter="\t")
+        labels = [row[1] for row in csvreader if len(row) > 1]
+        return labels
+    @property
+    def label_mapping(self):
+        return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}
+    def analyze_sentiment(
+            self,
+            text,
+            highlight: bool = False,
+            positive_sentiment_name: str = "positive",
+            max_number_of_shap_documents: Optional[int] = None,
+            min_abs_score: float = 0.1,
+    ):
+        if text is None:
+            return None
+        labels = self.classifier([str(text)], truncation=True, max_length=512)
+        ind_max = np.argmax([l["score"] for l in labels[0]])
+        sentiment = labels[0][ind_max]["label"]
+        max_score = labels[0][ind_max]["score"]
+        sentiment = self.label_mapping.get(sentiment, sentiment)
+        if sentiment.lower() == "neutral" and max_score > self.sensitivity:
+            overall_sentiment = 1e-5
+        elif sentiment.lower() == "neutral":
+            # get the next highest score
+            new_labels = labels[0][:ind_max] + labels[0][(ind_max + 1):]
+            new_ind_max = np.argmax([l["score"] for l in new_labels])
+            new_max_score = new_labels[new_ind_max]["score"]
+            new_sentiment = new_labels[new_ind_max]["label"]
+            new_sentiment = self.label_mapping.get(new_sentiment, new_sentiment)
+            overall_sentiment = self._calculate_overall_sentiment(
+                new_max_score, new_sentiment
+            )
+        else:
+            overall_sentiment = self._calculate_overall_sentiment(max_score, sentiment)
+        # Adjust to avoid bug
+        if overall_sentiment == 0:
+            overall_sentiment = 1e-5
+        if not highlight:
+            return {
+                "sentiment": sentiment,
+                "overall_sentiment_score": overall_sentiment,
+            }
+        shap_documents = self.get_shap_values(
+            text,
+            sentiment_ind=ind_max,
+            max_number_of_shap_documents=max_number_of_shap_documents,
+            min_abs_score=min_abs_score,
+        )
+        return {
+            "sentiment": sentiment,
+            "score": max_score,
+            "overall_sentiment": overall_sentiment,
+            "highlight_chunk_": shap_documents,
+        }
+    def _calculate_overall_sentiment(self, score: float, sentiment: str):
+        if sentiment.lower().strip() == self.positive_sentiment_name:
+            return score
+        else:
+            return -score
+    # def explainer(self):
+    #     if hasattr(self, "_explainer"):
+    #         return self._explainer
+    #     else:
+    #         try:
+    #             import shap
+    #         except ModuleNotFoundError:
+    #             raise MissingPackageError("shap")
+    #         self._explainer = shap.Explainer(self.classifier)
+    #         return self._explainer
+    def get_shap_values(
+            self,
+            text: str,
+            sentiment_ind: int = 2,
+            max_number_of_shap_documents: Optional[int] = None,
+            min_abs_score: float = 0.1,
+    ):
+        """Get SHAP values"""
+        shap_values = self.explainer([text])
+        cohorts = {"": shap_values}
+        cohort_labels = list(cohorts.keys())
+        cohort_exps = list(cohorts.values())
+        features = cohort_exps[0].data
+        feature_names = cohort_exps[0].feature_names
+        values = np.array([cohort_exps[i].values for i in range(len(cohort_exps))])
+        shap_docs = [
+            {"text": v, "score": f}
+            for f, v in zip(
+                [x[sentiment_ind] for x in values[0][0].tolist()], feature_names[0]
+            )
+        ]
+        if max_number_of_shap_documents is not None:
+            sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)
+        else:
+            sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)[
+                            :max_number_of_shap_documents
+                            ]
+        return [d for d in sorted_scores if abs(d["score"]) > min_abs_score]
+    def transform(self, text):
+        # # For each document, update the field
+        # sentiment_docs = [{"_id": d["_id"]} for d in documents]
+        # for i, t in enumerate(self.text_fields):
+        #     if self.output_fields is not None:
+        #         output_field = self.output_fields[i]
+        #     else:
+        #         output_field = self._get_output_field(t)
+        sentiment = self.analyze_sentiment(
+            text,
+            highlight=self.highlight,
+            max_number_of_shap_documents=self.max_number_of_shap_documents,
+            min_abs_score=self.min_abs_score, )
+        return sentiment
+def sentiment_classifier(text, model_type, sensitivity):
+    if model_type == 'Social Media Model':
+        model_name = "cardiffnlp/twitter-roberta-base-sentiment"
+    elif model_type == 'Survey Model':
+        model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
+    else:
+        model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
+    model = SentimentTransform(model_name=model_name, sensitivity=sensitivity)
+    res_dict = model.transform(text)
+    return res_dict['sentiment'], res_dict['overall_sentiment_score']
+demo = gr.Interface(
+    fn=sentiment_classifier,
+    inputs=[gr.Textbox(placeholder="Put the text here and click 'submit' to predict its sentiment", label="Input Text"), gr.Dropdown(["Social Media Model", "Survey Model"], value="Survey Model", label="Select the Model that you want to use."), gr.Slider(0, 1, step = 0.01, label="Sensitivity (How confident it is about being `neutral`. If you are dealing with news sources, you probably want less sensitivity.)")],
+    outputs=[gr.Textbox(label='Sentiment'), gr.Textbox(label='Sentiment Score')],
+)
+demo.launch(debug=True)