|
from sumy.parsers.plaintext import PlaintextParser |
|
from sumy.nlp.tokenizers import Tokenizer |
|
from sumy.summarizers.text_rank import TextRankSummarizer |
|
from sumy.nlp.stemmers import Stemmer |
|
from sumy.utils import get_stop_words |
|
import gradio as gr |
|
import nltk |
|
import time |
|
|
|
def textrank_summarizer(text_corpus): |
|
start_time = time.time() |
|
parser = PlaintextParser.from_string(text_corpus, Tokenizer("english")) |
|
stemmer = Stemmer("english") |
|
summarizer = TextRankSummarizer(stemmer) |
|
summarizer.stop_words = get_stop_words("english") |
|
sentences = summarizer(parser.document, 25) |
|
summary = "" |
|
for sentence in sentences: |
|
summary += str(sentence) + "" |
|
end_time = time.time() |
|
print(f"Time taken: {end_time - start_time:.2f} seconds") |
|
return summary |
|
|
|
def clear_everything(text_corpus, summary): |
|
return None, None |
|
|
|
theme = gr.themes.Soft( |
|
primary_hue="purple", |
|
secondary_hue="cyan", |
|
neutral_hue="slate", |
|
font=[ |
|
gr.themes.GoogleFont('Syne'), |
|
gr.themes.GoogleFont('Poppins'), |
|
gr.themes.GoogleFont('Poppins'), |
|
gr.themes.GoogleFont('Poppins') |
|
], |
|
) |
|
|
|
with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app: |
|
gr.HTML( |
|
value =''' |
|
<h1 style="text-align: center;">TextRank Summarizer</h1> |
|
<p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p> |
|
<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p> |
|
''') |
|
with gr.Row(): |
|
with gr.Column(): |
|
text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5) |
|
with gr.Row(): |
|
clear_btn = gr.Button(value="Clear", variant='stop') |
|
summarize_btn = gr.Button(value="Summarize", variant='primary') |
|
summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True) |
|
|
|
summarize_btn.click( |
|
textrank_summarizer, |
|
inputs=[text_corpus], |
|
outputs=[summary], |
|
concurrency_limit=25, |
|
scroll_to_output=True, |
|
show_api=True, |
|
api_name="textrank_summarizer", |
|
show_progress="full", |
|
) |
|
clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False) |
|
|
|
nltk.download('punkt', quiet=True) |
|
nltk.download('punkt_tab', quiet=True) |
|
app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False) |
|
|