mrsk1883 commited on
Commit
76e4ec0
·
1 Parent(s): a06b80e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -36
app.py CHANGED
@@ -10,46 +10,48 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
  def extract_first_sentence(text):
13
- # Use a simple regex to extract the first sentence
14
- sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
15
- if sentences:
16
- return sentences[0]
17
- else:
18
- return text
 
 
19
 
20
  def summarize_pdf_abstract(pdf_file):
21
- try:
22
- reader = PdfReader(pdf_file)
23
- abstract_text = ""
24
- for page in reader.pages:
25
- if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
26
- abstract_text = page.extract_text()
27
- break
28
-
29
- inputs = tokenizer(abstract_text, return_tensors="pt")
30
- outputs = model.generate(**inputs)
31
- summary = tokenizer.decode(outputs[0])
32
-
33
- # Extract only the first sentence
34
- summary_sentence = extract_first_sentence(summary)
35
-
36
- # Generate audio
37
- speech = gTTS(text=summary_sentence, lang="en")
38
- speech_bytes = BytesIO()
39
- speech.write_to_fp(speech_bytes)
40
-
41
- return {"summary": summary_sentence, "audio": speech_bytes}
42
-
43
- except Exception as e:
44
- raise Exception(str(e))
45
-
46
- def play_audio(audio_bytes):
47
- return gr.Audio(audio_bytes)
48
 
49
  interface = gr.Interface(
50
- fn=summarize_pdf_abstract,
51
- inputs=[gr.File(label="Upload PDF")],
52
- outputs=[gr.Textbox(label="Summary"), play_audio],
53
  )
54
 
55
  interface.launch(share=True)
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
  def extract_first_sentence(text):
13
+ """
14
+ Extracts the first sentence from the given text.
15
+ """
16
+ sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
17
+ if sentences:
18
+ return sentences[0]
19
+ else:
20
+ return text
21
 
22
  def summarize_pdf_abstract(pdf_file):
23
+ """
24
+ Reads a PDF file, extracts the abstract, summarizes it as the first sentence, and generates audio.
25
+ """
26
+ try:
27
+ reader = PdfReader(pdf_file)
28
+ abstract_text = ""
29
+ for page in reader.pages:
30
+ if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
31
+ abstract_text = page.extract_text()
32
+ break
33
+
34
+ inputs = tokenizer(abstract_text, return_tensors="pt")
35
+ outputs = model.generate(**inputs)
36
+ summary = tokenizer.decode(outputs[0])
37
+
38
+ # Extract only the first sentence
39
+ summary_sentence = extract_first_sentence(summary)
40
+
41
+ # Generate audio
42
+ speech = gTTS(text=summary_sentence, lang="en")
43
+ speech_bytes = BytesIO()
44
+ speech.write_to_fp(speech_bytes)
45
+
46
+ return {"summary": summary_sentence, "audio": speech_bytes}
47
+
48
+ except Exception as e:
49
+ raise Exception(str(e))
50
 
51
  interface = gr.Interface(
52
+ fn=summarize_pdf_abstract,
53
+ inputs=[gr.File(label="Upload PDF")],
54
+ outputs=[gr.Textbox(label="Summary"), gr.Audio()],
55
  )
56
 
57
  interface.launch(share=True)