Sibinraj's picture
Update app.py
599f557 verified
raw
history blame
2.41 kB
import torch
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import fitz # PyMuPDF
model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer.from_pretrained(model_path)
def extract_text_from_pdf(pdf_path):
text = ""
with fitz.open(pdf_path) as doc:
for page in doc:
text += page.get_text()
return text
def summarize_text(text, max_length, show_length):
inputs = tokenizer.encode(
"summarize: " + text,
return_tensors='pt',
max_length=512,
truncation=True,
padding='max_length'
)
summary_ids = model.generate(
inputs,
max_length=max_length + 20,
min_length=10,
num_beams=5,
no_repeat_ngram_size=2,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summary_words = summary.split()
if len(summary_words) > max_length:
summary = ' '.join(summary_words[:max_length])
elif len(summary_words) < max_length:
additional_tokens = model.generate(
tokenizer.encode(" ".join(summary_words), return_tensors='pt'),
max_length=max_length - len(summary_words) + len(summary_words),
min_length=max_length - len(summary_words) + len(summary_words),
num_beams=5,
no_repeat_ngram_size=2,
early_stopping=True
)
additional_summary = tokenizer.decode(additional_tokens[0], skip_special_tokens=True)
summary += ' ' + ' '.join(additional_summary.split()[len(summary_words):max_length])
if show_length:
summary_length = len(summary.split())
summary = f"{summary}\n\n(Summary length: {summary_length} words)"
return summary
def handle_pdf(pdf, max_length, show_length):
text = extract_text_from_pdf(pdf.name)
return summarize_text(text, max_length, show_length)
interface = gr.Interface(
fn=handle_pdf,
inputs=[
gr.File(label='Upload PDF', type='file'),
gr.Slider(minimum=10, maximum=150, step=1, label='Max Length'),
gr.Checkbox(label='Show summary length', value=False)
],
outputs=gr.Textbox(label='Summarized Text'),
title='PDF Text Summarizer using T5-finetuned-dialogue_sumxx'
)
interface.launch()