eHemink commited on
Commit
72abb26
·
1 Parent(s): 11b8b80

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.py.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1zk7xuWSf7ii7zowOqNVLy0FwXYVHYE2V
8
+ """
9
+
10
+ #imports
11
+ !pip install PyPDF2
12
+ import PyPDF2
13
+ import re
14
+ !pip install transformers
15
+ import transformers
16
+ from transformers import pipeline
17
+ !pip install git+https://github.com/suno-ai/bark.git
18
+ from bark import SAMPLE_RATE, generate_audio, preload_models
19
+ from scipy.io.wavfile import write as write_wav
20
+ from IPython.display import Audio
21
+
22
+ def abstract_to_audio(insert_pdf):
23
+ # Extracting the abstract text from the article pdf
24
+ def extract_abstract(pdf_file):
25
+ # Open the PDF file in read-binary mode
26
+ with open(pdf_file, 'rb') as file:
27
+ # Create a PDF reader object
28
+ pdf_reader = PyPDF2.PdfReader(file)
29
+
30
+ # Initialize an empty string to store abstract content
31
+ abstract_text = ''
32
+
33
+ # Loop through each page in the PDF
34
+ for page_num in range(len(pdf_reader.pages)):
35
+ # Get the text from the current page
36
+ page = pdf_reader.pages[page_num]
37
+ text = page.extract_text()
38
+
39
+ # Use regular expression to find the "Abstract" section
40
+ abstract_match = re.search(r'\bAbstract\b', text, re.IGNORECASE)
41
+ if abstract_match:
42
+ # Get the text after the "Abstract" heading until the next section, indicated by "Introduction" heading
43
+ start_index = abstract_match.end()
44
+ next_section_match = re.search(r'\bIntroduction\b', text[start_index:])
45
+ if next_section_match:
46
+ end_index = start_index + next_section_match.start()
47
+ abstract_text = text[start_index:end_index]
48
+ else:
49
+ # If no next section found, extract text till the end
50
+ abstract_text = text[start_index:]
51
+ break # Exit loop once abstract is found
52
+
53
+ return abstract_text.strip()
54
+
55
+
56
+ abstract = extract_abstract(insert_pdf)
57
+
58
+ # Creating a summarization pipeline
59
+ model = "lidiya/bart-large-xsum-samsum"
60
+ pipeline1 = pipeline(task = "summarization", model = model)
61
+
62
+ # Summarizing the extracted abstract
63
+ summarized = pipeline1(abstract)
64
+ print(summarized[0]['summary_text'])
65
+ tss_prompt = summarized[0]['summary_text']
66
+
67
+ # Generate audio file that speaks the generated sentence using Bark
68
+ # download and load all models
69
+ preload_models()
70
+
71
+ # generate audio from text
72
+ text_prompt = tss_prompt
73
+ audio_array = generate_audio(text_prompt)
74
+
75
+ # play text in notebook
76
+ return Audio(audio_array, rate=SAMPLE_RATE)
77
+
78
+ !pip install gradio
79
+ import gradio as gr
80
+
81
+ my_app = gr.Interface(fn=abstract_to_audio, inputs='file', outputs='audio')
82
+ my_app.launch(share=True)