Spaces:
Runtime error
Runtime error
File size: 6,336 Bytes
c0ae847 2d2d28b 03ddbfd 50d3158 7a228d7 432c28d 4b1dcc8 cce90fc 46193fd 8e106db 2d2d28b c7464b2 2d2d28b 20d6d68 03ddbfd 20d6d68 2115e8c 20d6d68 03ddbfd 20d6d68 b3bc472 ffc170d 1918f01 3dd36dd bfd0b51 2115e8c 20d6d68 b87bcef 20d6d68 2d2d28b 8e106db 2d2d28b 20d6d68 71f1303 2a61e91 03ddbfd 20d6d68 b87bcef c7464b2 2d2d28b 20d6d68 2d2d28b b87bcef 94d59bf f497f7f 2a61e91 20d6d68 94d59bf 97f7d3e da1f55e 1918f01 da1f55e 7a228d7 ccd2173 97f7d3e b3bc472 eea15da b3bc472 d7485e8 c0ade28 432c28d d7485e8 c0ade28 13e8889 d7485e8 2115e8c 20d6d68 0f4d5d5 cce90fc 97f7d3e 2115e8c 20d6d68 b87bcef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
from datasets import load_dataset
import PyPDF2
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
from keyword_extraction import extract_keywords
from keyphrase_extraction import extract_sentences_with_obligations
#from blanc import BlancHelp
# Load in ToS
dataset = load_dataset("EE21/ToS-Summaries")
# Extract titles or identifiers for the ToS
tos_titles = [f"Document {i}" for i in range(len(dataset['train']))]
# Set page to wide mode
st.set_page_config(layout="wide")
# Function to handle file upload and return its content
def load_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
pdf_text = ""
for page_num in range(len(pdf_reader.pages)):
pdf_text += pdf_reader.pages[page_num].extract_text() or ""
return pdf_text
# Main app
def main():
st.title("Terms of Service Summarizer")
# Layout: 3 columns
col1, col2, col3 = st.columns([1, 2, 3], gap="large")
# Left column: Radio buttons for summarizer choice
with col1:
radio_options = ["Abstractive (T5)", "Abstractive (LED)", 'Abstractive (Fine-tuned BART)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
"Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
"It uses a fine-tuned model on BART-large-CNN.<br>" \
"Extractive: Extractive summarization selects and extracts sentences or phrases directly from the original text to create a summary using the TextRank algorithm.<br>" \
"Keyword Extraction: Keyword extraction identifies and extracts important keywords or terms from the text using the Rake algorithm. " \
"These keywords can be used for various purposes such as content analysis and SEO.<br>" \
"Keyphrase Extraction: Keyphrase extraction is similar to keyword extraction but focuses on identifying multi-word phrases or expressions that are significant in the text using the Rake algorithm."
radio_selection = st.radio("Choose type of summarizer:", radio_options, help=help_text)
# Middle column: Text input and File uploader
with col2:
user_input = st.text_area("Enter your text here:")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
# Dropdown for selecting the document
tos_selection_index = st.selectbox("Select a Terms of Service Document", range(len(tos_titles)), format_func=lambda x: tos_titles[x])
if st.button("Summarize"):
if uploaded_file and user_input and tos_selection_index:
st.warning("Please provide either text input or a PDF file, not both.")
return
elif uploaded_file:
# Extract text from PDF
file_content = load_pdf(uploaded_file)
st.write("PDF uploaded successfully.")
elif user_input:
file_content = user_input
elif tos_selection_index is not None:
file_content = dataset['train'][tos_selection_index]['plain_text']
else:
st.warning("Please upload a PDF, enter some text, or select a document to summarize.")
return
# Perform extractive summarization
if radio_selection == "Extractive (TextRank)":
summary = summarize_with_textrank(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Extractive (Latent Semantic Analysis)":
summary = summarize_with_lsa(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive (Fine-tuned BART)":
summary = summarize_with_bart_ft(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive (BART-large-CNN)":
summary = summarize_with_bart_cnn(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive (T5)":
summary = summarize_with_t5(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive (LED)":
summary = summarize_with_led(file_content)
st.session_state.summary = summary
# Perform Keyword Extraction
if radio_selection == "Keyword Extraction (RAKE)":
summary = extract_keywords(file_content)
st.session_state.summary = summary
# Perform Keyphrase Extraction
if radio_selection == "Keyphrase Extraction (RAKE)":
summary = extract_sentences_with_obligations(file_content)
st.session_state.summary = summary
# Right column: Displaying text after pressing 'Summarize'
with col3:
st.write("Summary:")
if 'summary' in st.session_state:
st.write(st.session_state.summary)
# Check if a reference summary is available
if tos_selection_index is not None and 'summary' in dataset['train'][tos_selection_index]:
# Fetch the reference summary
reference_summary = data['train'][tos_selection_index]['summary']
# Calculate ROUGE scores
rouge = Rouge()
scores = rouge.get_scores(st.session_state.summary, reference_summary)
# Display ROUGE scores
st.write("ROUGE Scores:")
st.write(f"ROUGE-1: {scores[0]['rouge-1']['f']:.4f}")
st.write(f"ROUGE-2: {scores[0]['rouge-2']['f']:.4f}")
st.write(f"ROUGE-L: {scores[0]['rouge-l']['f']:.4f}")
if __name__ == "__main__":
main()
|