Spaces:
Runtime error
Runtime error
EmreYY20
commited on
Commit
•
5f89cc0
1
Parent(s):
586efa7
add hybrid summarization
Browse files- app.py +20 -15
- hybrid_summarization.py +6 -0
app.py
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
import streamlit as st
|
2 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from rouge import Rouge
|
4 |
from datasets import load_dataset
|
5 |
-
import PyPDF2
|
6 |
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
|
7 |
from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
|
8 |
from keyword_extraction import extract_keywords
|
9 |
from keyphrase_extraction import extract_sentences_with_obligations
|
10 |
-
from
|
11 |
-
import matplotlib.pyplot as plt
|
12 |
-
from PIL import Image
|
13 |
-
import io
|
14 |
-
#from blanc import BlancHelp
|
15 |
|
16 |
-
|
17 |
-
# Load in ToS
|
18 |
dataset = load_dataset("EE21/ToS-Summaries")
|
19 |
|
20 |
# Extract titles or identifiers for the ToS
|
21 |
tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
|
22 |
-
|
23 |
-
|
24 |
# Set page to wide mode
|
25 |
st.set_page_config(layout="wide")
|
26 |
|
@@ -41,7 +41,7 @@ def main():
|
|
41 |
|
42 |
# Left column: Radio buttons for summarizer choice
|
43 |
with col1:
|
44 |
-
radio_options = ["Abstractive (LongT5)", "Abstractive (LED)", 'Abstractive (BART Fine-tuned)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
|
45 |
"Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
|
46 |
|
47 |
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
|
@@ -77,6 +77,11 @@ def main():
|
|
77 |
st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
|
78 |
return
|
79 |
|
|
|
|
|
|
|
|
|
|
|
80 |
# Perform extractive summarization
|
81 |
if radio_selection == "Extractive (TextRank)":
|
82 |
summary = summarize_with_textrank(file_content)
|
@@ -87,22 +92,22 @@ def main():
|
|
87 |
summary = summarize_with_lsa(file_content)
|
88 |
st.session_state.summary = summary
|
89 |
|
90 |
-
# Perform
|
91 |
if radio_selection == "Abstractive (BART Fine-tuned)":
|
92 |
summary = summarize_with_bart_ft(file_content)
|
93 |
st.session_state.summary = summary
|
94 |
|
95 |
-
# Perform
|
96 |
if radio_selection == "Abstractive (BART-large-CNN)":
|
97 |
summary = summarize_with_bart_cnn(file_content)
|
98 |
st.session_state.summary = summary
|
99 |
|
100 |
-
# Perform
|
101 |
if radio_selection == "Abstractive (LongT5)":
|
102 |
summary = summarize_with_t5(file_content)
|
103 |
st.session_state.summary = summary
|
104 |
|
105 |
-
# Perform
|
106 |
if radio_selection == "Abstractive (LED)":
|
107 |
summary = summarize_with_led(file_content)
|
108 |
st.session_state.summary = summary
|
|
|
1 |
import streamlit as st
|
2 |
import re
|
3 |
+
import PyPDF2
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import io
|
6 |
+
from wordcloud import WordCloud
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
from rouge import Rouge
|
10 |
from datasets import load_dataset
|
|
|
11 |
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
|
12 |
from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
|
13 |
from keyword_extraction import extract_keywords
|
14 |
from keyphrase_extraction import extract_sentences_with_obligations
|
15 |
+
from hybrid_summarization import summarize_hybrid
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
#-------------------------------------------------------------------#
|
18 |
+
# Load in ToS-Summaries dataset
|
19 |
dataset = load_dataset("EE21/ToS-Summaries")
|
20 |
|
21 |
# Extract titles or identifiers for the ToS
|
22 |
tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
|
23 |
+
|
|
|
24 |
# Set page to wide mode
|
25 |
st.set_page_config(layout="wide")
|
26 |
|
|
|
41 |
|
42 |
# Left column: Radio buttons for summarizer choice
|
43 |
with col1:
|
44 |
+
radio_options = ["Hybrid (RAKE + BART Fine-tuned)", "Abstractive (LongT5)", "Abstractive (LED)", 'Abstractive (BART Fine-tuned)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
|
45 |
"Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
|
46 |
|
47 |
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
|
|
|
77 |
st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
|
78 |
return
|
79 |
|
80 |
+
# Perform hybrid summarization
|
81 |
+
if radio_selection == "Hybrid (RAKE + BART Fine-tuned)":
|
82 |
+
summary = summarize_hybrid(file_content)
|
83 |
+
st.session_state.summary = summary
|
84 |
+
|
85 |
# Perform extractive summarization
|
86 |
if radio_selection == "Extractive (TextRank)":
|
87 |
summary = summarize_with_textrank(file_content)
|
|
|
92 |
summary = summarize_with_lsa(file_content)
|
93 |
st.session_state.summary = summary
|
94 |
|
95 |
+
# Perform abstractive summarization
|
96 |
if radio_selection == "Abstractive (BART Fine-tuned)":
|
97 |
summary = summarize_with_bart_ft(file_content)
|
98 |
st.session_state.summary = summary
|
99 |
|
100 |
+
# Perform abstractive summarization
|
101 |
if radio_selection == "Abstractive (BART-large-CNN)":
|
102 |
summary = summarize_with_bart_cnn(file_content)
|
103 |
st.session_state.summary = summary
|
104 |
|
105 |
+
# Perform abstractive summarization
|
106 |
if radio_selection == "Abstractive (LongT5)":
|
107 |
summary = summarize_with_t5(file_content)
|
108 |
st.session_state.summary = summary
|
109 |
|
110 |
+
# Perform abstractive summarization
|
111 |
if radio_selection == "Abstractive (LED)":
|
112 |
summary = summarize_with_led(file_content)
|
113 |
st.session_state.summary = summary
|
hybrid_summarization.py
CHANGED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from keyphrase_extraction import extract_sentences_with_obligations
|
2 |
+
from abstractive_summarization import summarize_with_bart_ft
|
3 |
+
|
4 |
+
def summarize_hybrid(text):
|
5 |
+
extract = extract_sentences_with_obligations(text)
|
6 |
+
return summarize_with_bart_ft(extract)
|