Spaces:
Runtime error
Runtime error
import streamlit as st | |
import PyPDF2 | |
from extractive_summarization import summarize_with_textrank, summarize_with_lsa | |
from abstractive_model import summarize_with_bart | |
from keyword_extraction import extract_keywords | |
from keyphrase_extraction import extract_sentences_with_obligations | |
#from blanc import BlancHelp | |
# Set page to wide mode | |
st.set_page_config(layout="wide") | |
# Function to handle file upload and return its content | |
def load_pdf(file): | |
pdf_reader = PyPDF2.PdfReader(file) | |
pdf_text = "" | |
for page_num in range(len(pdf_reader.pages)): | |
pdf_text += pdf_reader.pages[page_num].extract_text() or "" | |
return pdf_text | |
# Main app | |
def main(): | |
st.title("Terms of Service Summarizer") | |
# Layout: 3 columns | |
col1, col2, col3 = st.columns([1, 3, 2], gap="large") | |
# Left column: Radio buttons for summarizer choice | |
with col1: | |
radio_options = ['Abstractive', 'Extractive (TextRank)', "Extractive (Latent Semantic Analysis)", 'Keyword Extraction', 'Keyphrase Extraction'] | |
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \ | |
"It uses a fine-tuned model on BART-large-CNN.<br>" \ | |
"Extractive: Extractive summarization selects and extracts sentences or phrases directly from the original text to create a summary using the TextRank algorithm.<br>" \ | |
"Keyword Extraction: Keyword extraction identifies and extracts important keywords or terms from the text using the Rake algorithm. " \ | |
"These keywords can be used for various purposes such as content analysis and SEO.<br>" \ | |
"Keyphrase Extraction: Keyphrase extraction is similar to keyword extraction but focuses on identifying multi-word phrases or expressions that are significant in the text using the Rake algorithm." | |
radio_selection = st.radio("Choose type of summarizer:", radio_options, help=help_text) | |
# Middle column: Text input and File uploader | |
with col2: | |
user_input = st.text_area("Enter your text here:") | |
uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
if st.button("Summarize"): | |
if uploaded_file and user_input: | |
st.warning("Please provide either text input or a PDF file, not both.") | |
return | |
elif uploaded_file: | |
# Extract text from PDF | |
file_content = load_pdf(uploaded_file) | |
st.write("PDF uploaded successfully.") | |
elif user_input: | |
file_content = user_input | |
else: | |
st.warning("Please upload a PDF or enter some text to summarize.") | |
return | |
# Perform extractive summarization | |
if radio_selection == "Extractive (TextRank)": | |
summary = summarize_with_textrank(file_content) | |
st.session_state.summary = summary | |
# Perform extractive summarization | |
if radio_selection == "Extractive (Latent Semantic Analysis)": | |
summary = summarize_with_lsa(file_content) | |
st.session_state.summary = summary | |
# Perform extractive summarization | |
if radio_selection == "Abstractive (Fine-tuned BART)": | |
summary = summarize_with_bart(file_content) | |
st.session_state.summary = summary | |
# Perform extractive summarization | |
if radio_selection == "Abstractive (BART-large-CNN)": | |
summary = summarize_with_bart(file_content) | |
st.session_state.summary = summary | |
# Perform Keyword Extraction | |
if radio_selection == "Keyword Extraction (RAKE)": | |
summary = extract_keywords(file_content) | |
st.session_state.summary = summary | |
# Perform Keyphrase Extraction | |
if radio_selection == "Keyphrase Extraction (RAKE)": | |
summary = extract_sentences_with_obligations(file_content) | |
st.session_state.summary = summary | |
# Right column: Displaying text after pressing 'Summarize' | |
with col3: | |
st.write("Summary:") | |
if 'summary' in st.session_state: | |
st.write(st.session_state.summary) | |
if __name__ == "__main__": | |
main() | |