File size: 5,002 Bytes
c2ad08f
 
 
 
 
 
 
 
92000af
c2ad08f
92000af
44f1a7e
c2ad08f
 
44f1a7e
c2ad08f
 
44f1a7e
 
 
 
 
 
c2ad08f
 
 
44f1a7e
c2ad08f
44f1a7e
c2ad08f
44f1a7e
 
 
 
 
 
 
 
c2ad08f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44f1a7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2ad08f
44f1a7e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
import requests
import nltk
from transformers import pipeline
from rake_nltk import Rake
from nltk.corpus import stopwords
from fuzzywuzzy import fuzz

st.title("Exploring Torch, Transformers, Rake, and Others analyzing Text")

# Define the options for the dropdown menu, Selecting a remote txt file already created to analyze the text
options = ['Apprecitation Letter', 'Regret Letter', 'Kindness Tale', 'Lost Melody Tale', 'Twitter Example 1', 'Twitter Example 2']

# Create a dropdown menu to select options
selected_option = st.selectbox("Select a preset option", options)

# Define URLs for different options
url_option1 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Appreciation_Letter.txt"
url_option2 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Regret_Letter.txt"
url_option3 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Kindness_Tale.txt"
url_option4 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Lost_Melody_Tale.txt"
url_option5 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_1.txt"
url_option6 = "https://raw.githubusercontent.com/peteciank/public_files/main/Transformers/Twitter_Example_2.txt"

# Function to fetch text content based on selected option
def fetch_text_content(selected_option):
    if selected_option == 'Apprecitation Letter':
        return requests.get(url_option1).text
    elif selected_option == 'Regret Letter':
        return requests.get(url_option2).text
    elif selected_option == 'Kindness Tale':
        return requests.get(url_option3).text
    elif selected_option == 'Lost Melody Tale':
        return requests.get(url_option4).text
    elif selected_option == 'Twitter Example 1':
        return requests.get(url_option5).text
    elif selected_option == 'Twitter Example 2':
        return requests.get(url_option6).text
    else:
        return ""

# Fetch text content based on selected option
text_content = fetch_text_content(selected_option)

# Display text content in a text area
jd = st.text_area("Text File Content", text_content)


# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Initialize pipeline for sentiment analysis
pipe_sent = pipeline('sentiment-analysis')
# Initialize pipeline for summarization
pipe_summ = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to extract keywords and remove duplicates
def extract_keywords(text):
    r = Rake()
    r.extract_keywords_from_text(text)
    # Get all phrases scored
    phrases_with_scores = r.get_ranked_phrases_with_scores()
    # Filter out stopwords
    stop_words = set(stopwords.words('english'))
    keywords = []
    for score, phrase in phrases_with_scores:
        # Check if the phrase is not a stopword and add to the list
        if phrase.lower() not in stop_words:
            keywords.append((score, phrase))
    # Sort keywords by score in descending order
    keywords.sort(key=lambda x: x[0], reverse=True)
    # Remove duplicates and merge similar keywords
    unique_keywords = []
    seen_phrases = set()
    for score, phrase in keywords:
        if phrase not in seen_phrases:
            # Check if the phrase is similar to any of the seen phrases
            similar_phrases = [seen_phrase for seen_phrase in seen_phrases if fuzz.ratio(phrase, seen_phrase) > 70]
            if similar_phrases:
                # If similar phrases are found, merge them into one phrase
                merged_phrase = max([phrase] + similar_phrases, key=len)
                unique_keywords.append((score, merged_phrase))
            else:
                unique_keywords.append((score, phrase))
            seen_phrases.add(phrase)
    return unique_keywords[:10]  # Return only the first 10 keywords

text = st.text_area('Enter the text to analyze', jd)

if text:

    with st.expander("Sentiment Analysis", expanded=True):
        # Sentiment analysis
        out_sentiment = pipe_sent(text)
        # Display sentiment analysis result
        sentiment_score = out_sentiment[0]['score']
        sentiment_label = out_sentiment[0]['label']
        sentiment_emoji = '😊' if sentiment_label == 'POSITIVE' else '😞'
        sentiment_text = f"Sentiment Score: {sentiment_score}, Sentiment Label: {sentiment_label.capitalize()} {sentiment_emoji}"
        st.write(sentiment_text)
        st.write("βœ… Completed")
        
    with st.expander("Summarization", expanded=True):        
        # Summarization
        out_summ = pipe_summ(text)
        summarized_text = out_summ[0]['summary_text']
        st.write(summarized_text)
        st.write("βœ… Completed")

    with st.expander("Keywords Extraction", expanded=True):
        # Keyword extraction
        keywords = extract_keywords(text)
        keyword_list = [keyword[1] for keyword in keywords]
        st.write(keyword_list)
        st.write("βœ… Completed")