Spaces:
Sleeping
Sleeping
File size: 2,882 Bytes
2054bfe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoConfig,AutoModelForSequenceClassification
from scipy.special import softmax
import os
# Requirements
def load_distilbert():
model_path = "bright1/fine-tuned-distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_path)
# config = AutoConfig.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
return model, tokenizer
def load_roberta():
model_path = "bright1/fine-tuned-twitter-Roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_path)
# config = AutoConfig.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
return model, tokenizer
# def check_csv(csv_file, data):
# if os.path.isfile(csv_file):
# data.to_csv(csv_file, mode='a', header=False, index=False, encoding='utf-8')
# else:
# history = data.copy()
# history.to_csv(csv_file, index=False)
#Preprocess text
def preprocess(text):
new_text = []
for t in text.split(" "):
t = "@user" if t.startswith("@") and len(t) > 1 else t
t = "http" if t.startswith("http") else t
print(t)
new_text.append(t)
print(new_text)
return " ".join(new_text)
#Process the input and return prediction
def sentiment_analysis(model_type, text):
if model_type== 'distilbert':
model, tokenizer = load_distilbert()
else:
model, tokenizer = load_roberta()
save_text = {'tweet': text}
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors = "pt") # for PyTorch-based models
output = model(**encoded_input)
scores_ = output[0][0].detach().numpy()
scores_ = softmax(scores_)
# Format output dict of scores
labels = ["Negative", "Neutral", "Positive"]
scores = {l:float(s) for (l,s) in zip(labels, scores_) }
# save_text.update(scores)
# user_data = {key: [value] for key,value in save_text.items()}
# data = pd.DataFrame(user_data,)
# check_csv('history.csv', data)
# hist_df = pd.read_csv('history.csv')
return scores
# , hist_df.head()
model_type = gr.Radio(choices=['distilbert', 'roberta'], label='Select model type', value='roberta' )
#Gradio app interface
#Gradio app interface
demo = gr.Interface(fn = sentiment_analysis,
inputs = [model_type, gr.TextArea("Write your text or tweet here", label="Analyze your COVID-19 tweets" )],
outputs = ["label"],
title = "COVID-19 Vaccine Tweet Analyzer App",
description = "COVID-19 Tweets Analyzer",
interpretation = "default",
examples = [["roberta", "Being vaccinated is actually awesome :)"]]
).launch()
|