EmreYY20 commited on
Commit
5f89cc0
1 Parent(s): 586efa7

add hybrid summarization

Browse files
Files changed (2) hide show
  1. app.py +20 -15
  2. hybrid_summarization.py +6 -0
app.py CHANGED
@@ -1,26 +1,26 @@
1
  import streamlit as st
2
  import re
 
 
 
 
 
 
3
  from rouge import Rouge
4
  from datasets import load_dataset
5
- import PyPDF2
6
  from extractive_summarization import summarize_with_textrank, summarize_with_lsa
7
  from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
8
  from keyword_extraction import extract_keywords
9
  from keyphrase_extraction import extract_sentences_with_obligations
10
- from wordcloud import WordCloud
11
- import matplotlib.pyplot as plt
12
- from PIL import Image
13
- import io
14
- #from blanc import BlancHelp
15
 
16
-
17
- # Load in ToS
18
  dataset = load_dataset("EE21/ToS-Summaries")
19
 
20
  # Extract titles or identifiers for the ToS
21
  tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
22
-
23
-
24
  # Set page to wide mode
25
  st.set_page_config(layout="wide")
26
 
@@ -41,7 +41,7 @@ def main():
41
 
42
  # Left column: Radio buttons for summarizer choice
43
  with col1:
44
- radio_options = ["Abstractive (LongT5)", "Abstractive (LED)", 'Abstractive (BART Fine-tuned)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
45
  "Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
46
 
47
  help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
@@ -77,6 +77,11 @@ def main():
77
  st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
78
  return
79
 
 
 
 
 
 
80
  # Perform extractive summarization
81
  if radio_selection == "Extractive (TextRank)":
82
  summary = summarize_with_textrank(file_content)
@@ -87,22 +92,22 @@ def main():
87
  summary = summarize_with_lsa(file_content)
88
  st.session_state.summary = summary
89
 
90
- # Perform extractive summarization
91
  if radio_selection == "Abstractive (BART Fine-tuned)":
92
  summary = summarize_with_bart_ft(file_content)
93
  st.session_state.summary = summary
94
 
95
- # Perform extractive summarization
96
  if radio_selection == "Abstractive (BART-large-CNN)":
97
  summary = summarize_with_bart_cnn(file_content)
98
  st.session_state.summary = summary
99
 
100
- # Perform extractive summarization
101
  if radio_selection == "Abstractive (LongT5)":
102
  summary = summarize_with_t5(file_content)
103
  st.session_state.summary = summary
104
 
105
- # Perform extractive summarization
106
  if radio_selection == "Abstractive (LED)":
107
  summary = summarize_with_led(file_content)
108
  st.session_state.summary = summary
 
1
  import streamlit as st
2
  import re
3
+ import PyPDF2
4
+ import matplotlib.pyplot as plt
5
+ import io
6
+ from wordcloud import WordCloud
7
+ from PIL import Image
8
+
9
  from rouge import Rouge
10
  from datasets import load_dataset
 
11
  from extractive_summarization import summarize_with_textrank, summarize_with_lsa
12
  from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
13
  from keyword_extraction import extract_keywords
14
  from keyphrase_extraction import extract_sentences_with_obligations
15
+ from hybrid_summarization import summarize_hybrid
 
 
 
 
16
 
17
+ #-------------------------------------------------------------------#
18
+ # Load in ToS-Summaries dataset
19
  dataset = load_dataset("EE21/ToS-Summaries")
20
 
21
  # Extract titles or identifiers for the ToS
22
  tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
23
+
 
24
  # Set page to wide mode
25
  st.set_page_config(layout="wide")
26
 
 
41
 
42
  # Left column: Radio buttons for summarizer choice
43
  with col1:
44
+ radio_options = ["Hybrid (RAKE + BART Fine-tuned)", "Abstractive (LongT5)", "Abstractive (LED)", 'Abstractive (BART Fine-tuned)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
45
  "Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
46
 
47
  help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
 
77
  st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
78
  return
79
 
80
+ # Perform hybrid summarization
81
+ if radio_selection == "Hybrid (RAKE + BART Fine-tuned)":
82
+ summary = summarize_hybrid(file_content)
83
+ st.session_state.summary = summary
84
+
85
  # Perform extractive summarization
86
  if radio_selection == "Extractive (TextRank)":
87
  summary = summarize_with_textrank(file_content)
 
92
  summary = summarize_with_lsa(file_content)
93
  st.session_state.summary = summary
94
 
95
+ # Perform abstractive summarization
96
  if radio_selection == "Abstractive (BART Fine-tuned)":
97
  summary = summarize_with_bart_ft(file_content)
98
  st.session_state.summary = summary
99
 
100
+ # Perform abstractive summarization
101
  if radio_selection == "Abstractive (BART-large-CNN)":
102
  summary = summarize_with_bart_cnn(file_content)
103
  st.session_state.summary = summary
104
 
105
+ # Perform abstractive summarization
106
  if radio_selection == "Abstractive (LongT5)":
107
  summary = summarize_with_t5(file_content)
108
  st.session_state.summary = summary
109
 
110
+ # Perform abstractive summarization
111
  if radio_selection == "Abstractive (LED)":
112
  summary = summarize_with_led(file_content)
113
  st.session_state.summary = summary
hybrid_summarization.py CHANGED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from keyphrase_extraction import extract_sentences_with_obligations
2
+ from abstractive_summarization import summarize_with_bart_ft
3
+
4
+ def summarize_hybrid(text):
5
+ extract = extract_sentences_with_obligations(text)
6
+ return summarize_with_bart_ft(extract)