peterciank commited on
Commit
c2ad08f
·
verified ·
1 Parent(s): ab5e33b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import nltk
4
+ from transformers import pipeline
5
+ from rake_nltk import Rake
6
+ from nltk.corpus import stopwords
7
+ from fuzzywuzzy import fuzz
8
+
9
+ s.title("Exploring Torch, Transformers, Rake, and Others analyzing Text")
10
+
11
+ # Define the options for the dropdown menu
12
+ options = ['Option 1', 'Option 2']
13
+
14
+ # Create a dropdown menu to select options
15
+ selected_option = st.selectbox("Select an option", options)
16
+
17
+ # Define URLs for different options
18
+ url_option1 = "https://raw.githubusercontent.com/peteciank/me/main/jd_sm.txt"
19
+ url_option2 = "https://raw.githubusercontent.com/peteciank/me/main/jd_controller.txt"
20
+
21
+ # Function to fetch text content based on selected option
22
+ def fetch_text_content(selected_option):
23
+ if selected_option == 'Option 1':
24
+ return requests.get(url_option1).text
25
+ elif selected_option == 'Option 2':
26
+ return requests.get(url_option2).text
27
+ else:
28
+ return ""
29
+
30
+ # Fetch text content based on selected option
31
+ text_content = fetch_text_content(selected_option)
32
+
33
+ # Display text content in a text area
34
+ jd = st.text_area("Text File Content", text_content)
35
+
36
+
37
+ # Download NLTK resources
38
+ nltk.download('punkt')
39
+ nltk.download('stopwords')
40
+
41
+ # Initialize pipeline for sentiment analysis
42
+ pipe_sent = pipeline('sentiment-analysis')
43
+ # Initialize pipeline for summarization
44
+ pipe_summ = pipeline("summarization", model="facebook/bart-large-cnn")
45
+
46
+ # Function to extract keywords and remove duplicates
47
+ def extract_keywords(text):
48
+ r = Rake()
49
+ r.extract_keywords_from_text(text)
50
+ # Get all phrases scored
51
+ phrases_with_scores = r.get_ranked_phrases_with_scores()
52
+ # Filter out stopwords
53
+ stop_words = set(stopwords.words('english'))
54
+ keywords = []
55
+ for score, phrase in phrases_with_scores:
56
+ # Check if the phrase is not a stopword and add to the list
57
+ if phrase.lower() not in stop_words:
58
+ keywords.append((score, phrase))
59
+ # Sort keywords by score in descending order
60
+ keywords.sort(key=lambda x: x[0], reverse=True)
61
+ # Remove duplicates and merge similar keywords
62
+ unique_keywords = []
63
+ seen_phrases = set()
64
+ for score, phrase in keywords:
65
+ if phrase not in seen_phrases:
66
+ # Check if the phrase is similar to any of the seen phrases
67
+ similar_phrases = [seen_phrase for seen_phrase in seen_phrases if fuzz.ratio(phrase, seen_phrase) > 70]
68
+ if similar_phrases:
69
+ # If similar phrases are found, merge them into one phrase
70
+ merged_phrase = max([phrase] + similar_phrases, key=len)
71
+ unique_keywords.append((score, merged_phrase))
72
+ else:
73
+ unique_keywords.append((score, phrase))
74
+ seen_phrases.add(phrase)
75
+ return unique_keywords[:10] # Return only the first 10 keywords
76
+
77
+ text = st.text_area('Enter the text to analyze', jd)
78
+
79
+ if text:
80
+ # Sentiment analysis
81
+ st.write("Sentiment Analysis")
82
+ out_sentiment = pipe_sent(text)
83
+ # Display sentiment analysis result
84
+ sentiment_score = out_sentiment[0]['score']
85
+ sentiment_label = out_sentiment[0]['label']
86
+ sentiment_emoji = '😊' if sentiment_label == 'POSITIVE' else '😞'
87
+ sentiment_text = f"Sentiment Score: {sentiment_score}, Sentiment Label: {sentiment_label.capitalize()} {sentiment_emoji}"
88
+ st.write(sentiment_text)
89
+
90
+ # Summarization
91
+ st.write("Summarization")
92
+ out_summ = pipe_summ(text)
93
+ summarized_text = out_summ[0]['summary_text']
94
+ st.write(summarized_text)
95
+
96
+ # Keyword extraction
97
+ st.write("Keywords")
98
+ keywords = extract_keywords(text)
99
+ keyword_list = [keyword[1] for keyword in keywords]
100
+ st.write(keyword_list)