Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -39,14 +39,14 @@ def extract_text(file_path):
|
|
39 |
text = ""
|
40 |
image_text = ""
|
41 |
with st.spinner(text="Extracting text from file..."):
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
|
51 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
52 |
for i, image in enumerate(images):
|
|
|
39 |
text = ""
|
40 |
image_text = ""
|
41 |
with st.spinner(text="Extracting text from file..."):
|
42 |
+
with open(file_path, "rb") as pdf_file:
|
43 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
44 |
+
num_pages = len(pdf_reader.pages)
|
45 |
+
|
46 |
+
for page_number in range(num_pages):
|
47 |
+
# st.write(f"Page {page_number + 1}")
|
48 |
+
page = pdf_reader.pages[page_number]
|
49 |
+
text += page.extract_text()
|
50 |
|
51 |
images = convert_from_path(file_path) # Convert PDF pages to images
|
52 |
for i, image in enumerate(images):
|