File size: 5,187 Bytes
c2ad08f
 
 
 
 
 
 
 
92000af
c2ad08f
92000af
d7a8932
c2ad08f
 
44f1a7e
c2ad08f
 
44f1a7e
 
 
 
 
 
c2ad08f
 
 
44f1a7e
c2ad08f
44f1a7e
c2ad08f
44f1a7e
 
 
 
 
 
 
 
c2ad08f
 
 
 
d7a8932
c2ad08f
 
d7a8932
c2ad08f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67b3c11
115c396
 
 
 
 
 
 
 
 
 
c2ad08f
115c396
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import requests
import nltk
from transformers import pipeline
from rake_nltk import Rake
from nltk.corpus import stopwords
from fuzzywuzzy import fuzz

st.title("Exploring Torch, Transformers, Rake, and Others analyzing Text")

# Define the options for the dropdown menu, Selecting a remote txt file already created to analyze the text
options = ['None','Apprecitation Letter', 'Regret Letter', 'Kindness Tale', 'Lost Melody Tale', 'Twitter Example 1', 'Twitter Example 2']

# Create a dropdown menu to select options
selected_option = st.selectbox("Select a preset option", options)

# Define URLs for different options
url_option1 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Appreciation_Letter.txt"
url_option2 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Regret_Letter.txt"
url_option3 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Kindness_Tale.txt"
url_option4 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Lost_Melody_Tale.txt"
url_option5 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_1.txt"
url_option6 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_2.txt"

# Function to fetch text content based on selected option
def fetch_text_content(selected_option):
    if selected_option == 'Apprecitation Letter':
        return requests.get(url_option1).text
    elif selected_option == 'Regret Letter':
        return requests.get(url_option2).text
    elif selected_option == 'Kindness Tale':
        return requests.get(url_option3).text
    elif selected_option == 'Lost Melody Tale':
        return requests.get(url_option4).text
    elif selected_option == 'Twitter Example 1':
        return requests.get(url_option5).text
    elif selected_option == 'Twitter Example 2':
        return requests.get(url_option6).text
    else:
        return ""

# Fetch text content based on selected option
jd = fetch_text_content(selected_option)

# Display text content in a text area
#jd = st.text_area("Text File Content", text_content)


# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Initialize pipeline for sentiment analysis
pipe_sent = pipeline('sentiment-analysis')
# Initialize pipeline for summarization
pipe_summ = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to extract keywords and remove duplicates
def extract_keywords(text):
    r = Rake()
    r.extract_keywords_from_text(text)
    # Get all phrases scored
    phrases_with_scores = r.get_ranked_phrases_with_scores()
    # Filter out stopwords
    stop_words = set(stopwords.words('english'))
    keywords = []
    for score, phrase in phrases_with_scores:
        # Check if the phrase is not a stopword and add to the list
        if phrase.lower() not in stop_words:
            keywords.append((score, phrase))
    # Sort keywords by score in descending order
    keywords.sort(key=lambda x: x[0], reverse=True)
    # Remove duplicates and merge similar keywords
    unique_keywords = []
    seen_phrases = set()
    for score, phrase in keywords:
        if phrase not in seen_phrases:
            # Check if the phrase is similar to any of the seen phrases
            similar_phrases = [seen_phrase for seen_phrase in seen_phrases if fuzz.ratio(phrase, seen_phrase) > 70]
            if similar_phrases:
                # If similar phrases are found, merge them into one phrase
                merged_phrase = max([phrase] + similar_phrases, key=len)
                unique_keywords.append((score, merged_phrase))
            else:
                unique_keywords.append((score, phrase))
            seen_phrases.add(phrase)
    return unique_keywords[:10]  # Return only the first 10 keywords

text = st.text_area('Enter the text to analyze', jd)

if st.button("Start Analysis"):
    with st.spinner("Analyzing Sentiment"):
        with st.expander("Sentiment Analysis - βœ… Completed", expanded=False):
            # Sentiment analysis
            out_sentiment = pipe_sent(text)
            # Display sentiment analysis result
            sentiment_score = out_sentiment[0]['score']
            sentiment_label = out_sentiment[0]['label']
            sentiment_emoji = '😊' if sentiment_label == 'POSITIVE' else '😞'
            sentiment_text = f"Sentiment Score: {sentiment_score}, Sentiment Label: {sentiment_label.capitalize()} {sentiment_emoji}"
            st.write(sentiment_text)

    with st.spinner("Summarizing - This may take a while"):
        with st.expander("Summarization - βœ… Completed", expanded=False):        
            # Summarization
            out_summ = pipe_summ(text)
            summarized_text = out_summ[0]['summary_text']
            st.write(summarized_text)

    with st.spinner("Extracting Keywords"):
        with st.expander("Keywords Extraction - βœ… Completed", expanded=False):
            # Keyword extraction
            keywords = extract_keywords(text)
            keyword_list = [keyword[1] for keyword in keywords]
            st.write(keyword_list)