kxx-kkk commited on
Commit
4ef7de6
1 Parent(s): e07e7c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -39,14 +39,14 @@ def extract_text(file_path):
39
  text = ""
40
  image_text = ""
41
  with st.spinner(text="Extracting text from file..."):
42
- # with open(file_path, "rb") as pdf_file:
43
- # pdf_reader = PyPDF2.PdfReader(pdf_file)
44
- # num_pages = len(pdf_reader.pages)
45
-
46
- # for page_number in range(num_pages):
47
- # # st.write(f"Page {page_number + 1}")
48
- # page = pdf_reader.pages[page_number]
49
- # text += page.extract_text()
50
 
51
  images = convert_from_path(file_path) # Convert PDF pages to images
52
  for i, image in enumerate(images):
 
39
  text = ""
40
  image_text = ""
41
  with st.spinner(text="Extracting text from file..."):
42
+ with open(file_path, "rb") as pdf_file:
43
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
44
+ num_pages = len(pdf_reader.pages)
45
+
46
+ for page_number in range(num_pages):
47
+ # st.write(f"Page {page_number + 1}")
48
+ page = pdf_reader.pages[page_number]
49
+ text += page.extract_text()
50
 
51
  images = convert_from_path(file_path) # Convert PDF pages to images
52
  for i, image in enumerate(images):