mrsk1883 commited on
Commit
a06b80e
·
1 Parent(s): 73cee55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -1,11 +1,22 @@
1
  import gradio as gr
2
  from PyPDF2 import PdfReader
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
 
4
 
5
  model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
6
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
 
 
 
 
 
 
 
 
 
9
  def summarize_pdf_abstract(pdf_file):
10
  try:
11
  reader = PdfReader(pdf_file)
@@ -19,15 +30,26 @@ def summarize_pdf_abstract(pdf_file):
19
  outputs = model.generate(**inputs)
20
  summary = tokenizer.decode(outputs[0])
21
 
22
- return {"summary": summary}
 
 
 
 
 
 
 
 
23
 
24
  except Exception as e:
25
  raise Exception(str(e))
26
 
 
 
 
27
  interface = gr.Interface(
28
  fn=summarize_pdf_abstract,
29
  inputs=[gr.File(label="Upload PDF")],
30
- outputs=[gr.Textbox(label="Summary")]
31
  )
32
 
33
  interface.launch(share=True)
 
1
  import gradio as gr
2
  from PyPDF2 import PdfReader
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ from gtts import gTTS
5
+ from io import BytesIO
6
+ import re
7
 
8
  model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
9
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
+ def extract_first_sentence(text):
13
+ # Use a simple regex to extract the first sentence
14
+ sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
15
+ if sentences:
16
+ return sentences[0]
17
+ else:
18
+ return text
19
+
20
  def summarize_pdf_abstract(pdf_file):
21
  try:
22
  reader = PdfReader(pdf_file)
 
30
  outputs = model.generate(**inputs)
31
  summary = tokenizer.decode(outputs[0])
32
 
33
+ # Extract only the first sentence
34
+ summary_sentence = extract_first_sentence(summary)
35
+
36
+ # Generate audio
37
+ speech = gTTS(text=summary_sentence, lang="en")
38
+ speech_bytes = BytesIO()
39
+ speech.write_to_fp(speech_bytes)
40
+
41
+ return {"summary": summary_sentence, "audio": speech_bytes}
42
 
43
  except Exception as e:
44
  raise Exception(str(e))
45
 
46
+ def play_audio(audio_bytes):
47
+ return gr.Audio(audio_bytes)
48
+
49
  interface = gr.Interface(
50
  fn=summarize_pdf_abstract,
51
  inputs=[gr.File(label="Upload PDF")],
52
+ outputs=[gr.Textbox(label="Summary"), play_audio],
53
  )
54
 
55
  interface.launch(share=True)