Sibinraj commited on
Commit
599f557
1 Parent(s): fa47533

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,14 +1,20 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import pipeline
4
  from transformers import T5ForConditionalGeneration, T5Tokenizer
 
5
 
6
  model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
7
  model = T5ForConditionalGeneration.from_pretrained(model_path)
8
  tokenizer = T5Tokenizer.from_pretrained(model_path)
9
 
 
 
 
 
 
 
 
10
  def summarize_text(text, max_length, show_length):
11
- # Preprocess the text
12
  inputs = tokenizer.encode(
13
  "summarize: " + text,
14
  return_tensors='pt',
@@ -17,20 +23,17 @@ def summarize_text(text, max_length, show_length):
17
  padding='max_length'
18
  )
19
 
20
- # Generate the summary
21
  summary_ids = model.generate(
22
  inputs,
23
- max_length=max_length + 20, # Allow some buffer
24
- min_length=10, # Set a reasonable minimum length
25
  num_beams=5,
26
  no_repeat_ngram_size=2,
27
  early_stopping=True
28
  )
29
 
30
- # Decode the summary
31
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
32
 
33
- # Trim the summary to the desired length
34
  summary_words = summary.split()
35
  if len(summary_words) > max_length:
36
  summary = ' '.join(summary_words[:max_length])
@@ -46,26 +49,25 @@ def summarize_text(text, max_length, show_length):
46
  additional_summary = tokenizer.decode(additional_tokens[0], skip_special_tokens=True)
47
  summary += ' ' + ' '.join(additional_summary.split()[len(summary_words):max_length])
48
 
49
- # If show_length is True, append the length of the summary
50
  if show_length:
51
  summary_length = len(summary.split())
52
  summary = f"{summary}\n\n(Summary length: {summary_length} words)"
53
 
54
  return summary
55
 
 
 
 
 
56
  interface = gr.Interface(
57
- fn=summarize_text,
58
  inputs=[
59
- gr.Textbox(lines=10, placeholder='Enter Text Here...', label='Input text'),
60
  gr.Slider(minimum=10, maximum=150, step=1, label='Max Length'),
61
- gr.Checkbox(label='Show summary length')
62
  ],
63
  outputs=gr.Textbox(label='Summarized Text'),
64
- title='Text Summarizer using T5-finetuned-dialogue_sumxx',
65
- examples=[
66
- ['The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.', 50, 50],
67
- ['#Person1#: Is this the workshop to prepare for an interview? #Person2#: This is the interview class. Welcome to our class. #Person1#: I am really excited to be taking this workshop so that I can get ready for my interview next week. #Person2#: We are all learning things that will help us in our interview. What do you think are some important considerations going into your interview? #Person1#: I think that we should dress neatly and appropriately. #Person2#: Yes. Second, as you can imagine, attitude and friendliness go a long way. #Person1#: Yes, and I always feel much better when I am friendly. #Person2#: Believe it or not, the interviewers are as interested in your questions as they are in your answers. #Person1#: Any more hints as to what I should do in an interview? #Person2#: Always be honest with your answers. The interviewers really do want to know if you will be a good fit for them.', 50, 50]
68
- ]
69
  )
70
 
71
  interface.launch()
 
1
  import torch
2
  import gradio as gr
 
3
  from transformers import T5ForConditionalGeneration, T5Tokenizer
4
+ import fitz # PyMuPDF
5
 
6
  model_path = 'Sibinraj/T5-finetuned-dialogue_sumxx'
7
  model = T5ForConditionalGeneration.from_pretrained(model_path)
8
  tokenizer = T5Tokenizer.from_pretrained(model_path)
9
 
10
+ def extract_text_from_pdf(pdf_path):
11
+ text = ""
12
+ with fitz.open(pdf_path) as doc:
13
+ for page in doc:
14
+ text += page.get_text()
15
+ return text
16
+
17
  def summarize_text(text, max_length, show_length):
 
18
  inputs = tokenizer.encode(
19
  "summarize: " + text,
20
  return_tensors='pt',
 
23
  padding='max_length'
24
  )
25
 
 
26
  summary_ids = model.generate(
27
  inputs,
28
+ max_length=max_length + 20,
29
+ min_length=10,
30
  num_beams=5,
31
  no_repeat_ngram_size=2,
32
  early_stopping=True
33
  )
34
 
 
35
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
36
 
 
37
  summary_words = summary.split()
38
  if len(summary_words) > max_length:
39
  summary = ' '.join(summary_words[:max_length])
 
49
  additional_summary = tokenizer.decode(additional_tokens[0], skip_special_tokens=True)
50
  summary += ' ' + ' '.join(additional_summary.split()[len(summary_words):max_length])
51
 
 
52
  if show_length:
53
  summary_length = len(summary.split())
54
  summary = f"{summary}\n\n(Summary length: {summary_length} words)"
55
 
56
  return summary
57
 
58
+ def handle_pdf(pdf, max_length, show_length):
59
+ text = extract_text_from_pdf(pdf.name)
60
+ return summarize_text(text, max_length, show_length)
61
+
62
  interface = gr.Interface(
63
+ fn=handle_pdf,
64
  inputs=[
65
+ gr.File(label='Upload PDF', type='file'),
66
  gr.Slider(minimum=10, maximum=150, step=1, label='Max Length'),
67
+ gr.Checkbox(label='Show summary length', value=False)
68
  ],
69
  outputs=gr.Textbox(label='Summarized Text'),
70
+ title='PDF Text Summarizer using T5-finetuned-dialogue_sumxx'
 
 
 
 
71
  )
72
 
73
  interface.launch()