Spaces:
Runtime error
Runtime error
File size: 2,531 Bytes
c69842f b3fb325 4abb8db b3fb325 4abb8db b3fb325 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import streamlit as st
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pandas as pd
import random
classifiers = ['toxic', 'severe_toxic', 'obscene',
'threat', 'insult', 'identity_hate']
def reset_scores():
global scores_df
scores_df = pd.DataFrame(columns=['Comment'] + classifiers)
def get_score(model_base, text):
if model_base == "bert-base-cased":
model_dir = "./bert/_bert_model"
elif model_base == "distilbert-base-cased":
model_dir = "./distilbert/_distilbert_model"
else:
model_dir = "./roberta/_roberta_model"
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_base)
inputs = tokenizer.encode_plus(
text, max_length=512, truncation=True, padding=True, return_tensors='pt')
outputs = model(**inputs)
predictions = torch.sigmoid(outputs.logits)
return predictions
# Ask user for input, return scores
st.title("Toxic Comment Classifier")
text_input = st.text_input("Enter text for toxicity classification",
"I hope you die")
submit_btn = st.button("Submit")
# Drop down menu for model selection, default is roberta
model_base = st.selectbox("Select a pretrained model",
["roberta-base", "bert-base-cased", "distilbert-base-cased"])
if submit_btn and text_input:
result = get_score(model_base, text_input)
df = pd.DataFrame([result[0].tolist()], columns=classifiers)
df = df.round(2) # Round the values to 2 decimal places
# Format the values as percentages
df = df.applymap(lambda x: '{:.0%}'.format(x))
st.table(df)
# Read the test dataset
test_df = pd.read_csv(
"./jigsaw-toxic-comment-classification-challenge/test.csv")
# Select 10 random comments from the test dataset
sample_df = test_df.sample(n=3)
# Create an empty DataFrame to store the scores
reset_scores()
# Calculate the scores for each comment and add them to the DataFrame
for index, row in sample_df.iterrows():
result = get_score(model_base, row['comment_text'])
scores = result[0].tolist()
scores_df.loc[len(scores_df)] = [row['comment_text']] + scores
# Round the values to 2 decimal places
scores_df = scores_df.round(2)
st.subheader("Toxicity Scores for Random Comments")
st.table(scores_df)
# Create a button to reset the scores
if st.button("Refresh Random Tweets"):
reset_scores()
st.success("New tweets have been loaded!")
|