Spaces:

raannakasturi
/

TextRankSummarizer

Running

App Files Files Community

TextRankSummarizer / app.py

raannakasturi

Update app.py

e670f97 verified 3 months ago

raw

history blame contribute delete

2.67 kB

	from sumy.parsers.plaintext import PlaintextParser
	from sumy.nlp.tokenizers import Tokenizer
	from sumy.summarizers.text_rank import TextRankSummarizer
	from sumy.nlp.stemmers import Stemmer
	from sumy.utils import get_stop_words
	import gradio as gr
	import nltk
	import time

	def textrank_summarizer(text_corpus):
	start_time = time.time()
	parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
	stemmer = Stemmer("english")
	summarizer = TextRankSummarizer(stemmer)
	summarizer.stop_words = get_stop_words("english")
	sentences = summarizer(parser.document, 25)
	summary = ""
	for sentence in sentences:
	summary += str(sentence) + ""
	end_time = time.time()
	print(f"Time taken: {end_time - start_time:.2f} seconds")
	return summary

	def clear_everything(text_corpus, summary):
	return None, None

	theme = gr.themes.Soft(
	primary_hue="purple",
	secondary_hue="cyan",
	neutral_hue="slate",
	font=[
	gr.themes.GoogleFont('Syne'),
	gr.themes.GoogleFont('Poppins'),
	gr.themes.GoogleFont('Poppins'),
	gr.themes.GoogleFont('Poppins')
	],
	)

	with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
	gr.HTML(
	value ='''
	<h1 style="text-align: center;">TextRank Summarizer</h1>
	<p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p>
	<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
	''')
	with gr.Row():
	with gr.Column():
	text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
	with gr.Row():
	clear_btn = gr.Button(value="Clear", variant='stop')
	summarize_btn = gr.Button(value="Summarize", variant='primary')
	summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)

	summarize_btn.click(
	textrank_summarizer,
	inputs=[text_corpus],
	outputs=[summary],
	concurrency_limit=25,
	scroll_to_output=True,
	show_api=True,
	api_name="textrank_summarizer",
	show_progress="full",
	)
	clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)

	nltk.download('punkt', quiet=True)
	nltk.download('punkt_tab', quiet=True)
	app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)