Faisalaldwaish1's picture
Update app.py
a773866 verified
import pandas as pd
from transformers import pipeline
import gradio as gr
import seaborn as sns
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
# Load the restaurant reviews dataset
reviews_df = pd.read_csv('Restaurant_reviews.csv')
# Load the Hugging Face sentiment analysis model
sentiment_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
# Function to classify a user's review based on dataset matching and sentiment analysis
def classify_review(user_review):
try:
if not user_review.strip():
return "Please enter a valid review."
best_match = None
best_score = 0
for _, row in reviews_df.iterrows():
if pd.isna(row['Review']):
continue
# Use fuzzy matching to find the most similar review in the dataset
score = fuzz.token_sort_ratio(user_review.lower(), str(row['Review']).lower())
if score > best_score:
best_score = score
best_match = row
if best_score > 80: # If the fuzzy match score is greater than 80
rating = best_match['Rating']
rating_based_classification = f"Positive review based on rating: {rating}" if int(rating) >= 4 else f"Negative review based on rating: {rating}"
sentiment_result = sentiment_model(user_review)[0]
sentiment = sentiment_result['label']
confidence = sentiment_result['score']
sentiment_based_classification = f"Model prediction: {sentiment} with confidence: {confidence:.2f}"
return f"{rating_based_classification}\n{sentiment_based_classification}\nMatching Score: {best_score}%\nBest Match\n{best_match}"
else:
return "Review not found in the dataset."
except Exception as e:
return f"An error occurred: {str(e)}"
# Function to plot the distribution of ratings from the dataset
def plot_rating_distribution():
plt.figure(figsize=(8, 6))
sns.countplot(x='Rating', data=reviews_df, order=[1, 2, 3, 4, 5])
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.tight_layout()
return plt.gcf()
# Function to allow users to preview the dataset (first 10 rows)
def preview_dataset():
return reviews_df.head(15) # Return the first 10 rows of the dataset
# Create the Gradio interface for classifying reviews
review_interface = gr.Interface(
fn=classify_review,
inputs=gr.Textbox(lines=2, placeholder="Enter your review here", label="Reviews"),
outputs="text",
title="Review Classifier Based on Rating and Hugging Face Model",
description="Enter a restaurant review. The system will classify it based on the dataset rating and use a sentiment analysis model."
)
# Create the Gradio interface for classifying reviews
review_interface = gr.Interface(
fn=classify_review,
inputs=gr.Textbox(lines=2, placeholder="Enter your review here", label="Reviews"),
outputs="text",
title="Review Classifier Based on Rating and Hugging Face Model",
description="Enter a restaurant review. The system will classify it based on the dataset rating and use a sentiment analysis model."
)
# Create the Gradio interface for plotting the rating distribution
plot_interface = gr.Interface(
fn=plot_rating_distribution,
inputs=[],
outputs="plot",
title="Rating Distribution",
description="Shows the distribution of ratings in the dataset."
)
# Create the Gradio interface for previewing the dataset
preview_interface = gr.Interface(
fn=preview_dataset,
inputs=[],
outputs="dataframe",
title="Preview Restaurant Reviews Dataset",
description="Displays the first 10 rows of the dataset for preview."
)
# Combine all interfaces (Review Classifier, Rating Distribution, Dataset Preview, Dataset Download) into tabs
tabbed_interface = gr.TabbedInterface(
[review_interface, plot_interface, preview_interface],
["Review Classifier", "Rating Distribution", "Dataset Preview", ""]
)
# Launch the Gradio interface
tabbed_interface.launch()