resto_reviews / app.py
hiba9's picture
Upload app.py
369a2fc verified
raw
history blame contribute delete
No virus
3.17 kB
import torch
import streamlit as st
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import re
import string
def tokenize_sentences(sentence):
encoded_dict = tokenizer.encode_plus(
sentence,
add_special_tokens=True,
max_length=128,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return torch.cat([encoded_dict['input_ids']], dim=0), torch.cat([encoded_dict['attention_mask']], dim=0)
def preprocess_query(query):
query = str(query).lower()
query = query.strip()
query=query.translate(str.maketrans("", "", string.punctuation))
return query
def predict_aspects(sentence, threshold):
input_ids, attention_mask = tokenize_sentences(sentence)
with torch.no_grad():
outputs = aspects_model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
predicted_aspects = torch.sigmoid(logits).squeeze().tolist()
results = dict()
for label, prediction in zip(LABEL_COLUMNS_ASPECTS, predicted_aspects):
if prediction < threshold:
continue
precentage = round(float(prediction) * 100, 2)
results[label] = precentage
return results
# Load tokenizer and model
BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION = 'roberta-large'
tokenizer = RobertaTokenizer.from_pretrained(BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION, do_lower_case=True)
LABEL_COLUMNS_ASPECTS = ['FOOD-CUISINE', 'FOOD-DEALS', 'FOOD-DIET_OPTION', 'FOOD-EXPERIENCE', 'FOOD-FLAVOR', 'FOOD-GENERAL', 'FOOD-INGREDIENT', 'FOOD-KITCHEN', 'FOOD-MEAL', 'FOOD-MENU', 'FOOD-PORTION', 'FOOD-PRESENTATION', 'FOOD-PRICE', 'FOOD-QUALITY', 'FOOD-RECOMMENDATION', 'FOOD-TASTE', 'GENERAL-GENERAL', 'RESTAURANT-ATMOSPHERE', 'RESTAURANT-BUILDING', 'RESTAURANT-DECORATION', 'RESTAURANT-EXPERIENCE', 'RESTAURANT-FEATURES', 'RESTAURANT-GENERAL', 'RESTAURANT-HYGIENE', 'RESTAURANT-KITCHEN', 'RESTAURANT-LOCATION', 'RESTAURANT-OPTIONS', 'RESTAURANT-RECOMMENDATION', 'RESTAURANT-SEATING_PLAN', 'RESTAURANT-VIEW', 'SERVICE-BEHAVIOUR', 'SERVICE-EXPERIENCE', 'SERVICE-GENERAL', 'SERVICE-WAIT_TIME']
aspects_model = RobertaForSequenceClassification.from_pretrained(BERT_MODEL_NAME_FOR_ASPECTS_CLASSIFICATION, num_labels=len(LABEL_COLUMNS_ASPECTS))
aspects_model.load_state_dict(torch.load('./Aspects_Extraction_Model_updated.pth', map_location=torch.device('cpu')), strict=False)
aspects_model.eval()
# Streamlit App
st.title("Implicit and Explicit Aspect Extraction")
sentence = st.text_input("Enter a sentence:")
threshold = st.slider("Threshold", min_value=0.0, max_value=1.0, step=0.01, value=0.5)
if sentence:
processed_sentence = preprocess_query(sentence)
results = predict_aspects(processed_sentence, threshold)
if len(results) > 0:
st.write("Predicted Aspects:")
table_data = [["Category","Aspect", "Probability"]]
for aspect, percentage in results.items():
aspect_parts = aspect.split("-")
table_data.append(aspect_parts + [f"{percentage}%"])
st.table(table_data)
else:
st.write("No aspects above the threshold.")