File size: 3,503 Bytes
320009b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca0169a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320009b
 
 
 
 
 
 
 
 
 
 
 
ca0169a
 
320009b
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import torch
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt

# Use a pipeline as a high-level helper
from transformers import pipeline
# model_path = ("../Models/models--distilbert--distilbert-base-uncased-finetuned-sst-2-english"
#               "/snapshots/714eb0fa89d2f80546fda750413ed43d93601a13")

analyzer = pipeline("text-classification",
                model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

# analyzer = pipeline("text-classification",
#                 model=model_path)



# print(analyzer(["This production is good", "This product was quite expensive"]))

def sentiment_analyzer(review):
    sentiment = analyzer(review)
    return sentiment[0]['label']

def sentiment_bar_chart(df):
    sentiment_counts = df['Sentiment'].value_counts()

    # Create a bar chart
    fig, ax = plt.subplots()
    sentiment_counts.plot(kind='pie', ax=ax, autopct='%1.1f%%', color=['green', 'red'])
    ax.set_title('Review Sentiment Counts')
    ax.set_xlabel('Sentiment')
    ax.set_ylabel('Count')
    # ax.set_xticklabels(['Positive', 'Negative'], rotation=0)

    # Return the figure object
    return fig


def read_reviews_and_analyze_sentiment(file_object):
    if file_object.name.endswith('.xlsx'):
        # Load the Excel file into a DataFrame
        df = pd.read_excel(file_object)
    
        # Check if 'Review' column is in the DataFrame
        if 'Reviews' not in df.columns:
            raise ValueError("Excel file must contain a 'Review' column.")

        # Apply the get_sentiment function to each review in the DataFrame
        df['Sentiment'] = df['Reviews'].apply(sentiment_analyzer)
    elif file_object.name.endswith('.docx'):
        # Read the content of the DOCX file
        doc = Document(file_object)
        reviews = [para.text for para in doc.paragraphs if para.text.strip()]
        df = pd.DataFrame({'Reviews': reviews})
        df['Sentiment'] = df['Reviews'].apply(sentiment_analyzer)
    
    elif file_object.name.endswith('.pdf'):
        # Read the content of the PDF file
        reader = PdfReader(file_object)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        reviews = text.split('\n')  # Assuming reviews are newline separated
        df = pd.DataFrame({'Reviews': reviews})
        df['Sentiment'] = df['Reviews'].apply(sentiment_analyzer)
    
    else:
        raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")

    chart_object = sentiment_bar_chart(df)
    return df, chart_object

# result = read_reviews_and_analyze_sentiment("../Files/Prod-review.xlsx")
# print(result)
# Example usage:
# df = read_reviews_and_analyze_sentiment('path_to_your_excel_file.xlsx')
# print(df)


demo = gr.Interface(fn=read_reviews_and_analyze_sentiment,
                    inputs=[gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file")],
                    outputs=[gr.Dataframe(label="Sentiments"), gr.Plot(label="Sentiment Analysis"),gr.Textbox(label="Single Sentence Sentiment Analysis")],
                    title="@GenAILearniverse Project 3: Sentiment Analyzer",
                    description="THIS APPLICATION WILL BE USED TO ANALYZE THE SENTIMENT BASED ON FILE UPLAODED.")
demo.launch()






# Example usage:
# Assuming you have a dataframe `df` with appropriate data
# fig = sentiment_bar_chart(df)
# fig.show()  # This line is just to visualize the plot in a local environment