zmbfeng commited on
Commit
1832c6c
·
1 Parent(s): d0bccec

missing first page fixed

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -156,17 +156,17 @@ if uploaded_locked_pdf_file is not None:
156
  if 'page_count' in st.session_state:
157
  st.write(f"total page count = {st.session_state.page_count}")
158
  if 'num_pages_to_extract'not in st.session_state:
159
- st.session_state.num_pages_to_extract = st.slider('Start page number:', min_value=1, max_value=st.session_state.page_count-1, value=1, key='num_pages_to_extract_slider')
160
  else:
161
- st.session_state.num_pages_to_extract = st.slider('Start page number:', min_value=1, max_value=st.session_state.page_count-1, value=st.session_state.num_pages_to_extract , key='num_pages_to_extract_slider')
162
 
163
  if 'num_pages_to_extract2'not in st.session_state:
164
- st.session_state.num_pages_to_extract2 = st.slider('End page number', min_value=1, max_value=st.session_state.page_count-1, value=st.session_state.page_count-1, key='num_pages_to_extract_slider2')
165
  else:
166
- st.session_state.num_pages_to_extract2 = st.slider('End Page number', min_value=1, max_value=st.session_state.page_count-1, value=st.session_state.num_pages_to_extract2 , key='num_pages_to_extract_slider2')
167
 
168
 
169
- st.write(f"num of pages to extract {st.session_state.num_pages_to_extract}")
170
  if 'run_button' in st.session_state and st.session_state.run_button == True:
171
  st.session_state.running = True
172
  else:
@@ -182,12 +182,12 @@ if 'page_count' in st.session_state:
182
  pdf_tables_image_list=[]
183
  st.session_state.pdf_text_list=[]
184
 
185
- for page_number in range(st.session_state.num_pages_to_extract,
186
- st.session_state.num_pages_to_extract2+1):
187
  # print(f"page_number={page_number}")
188
  image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
189
  st.session_state.color_image_list.append(image[0])
190
- progress_percentage = (page_number-st.session_state.num_pages_to_extract) / (st.session_state.num_pages_to_extract2-st.session_state.num_pages_to_extract)
191
  read_pdf_progress_bar.progress(progress_percentage)
192
  read_pdf_progress_bar.progress(0)
193
  for index, image in enumerate(st.session_state.color_image_list):
@@ -197,22 +197,22 @@ if 'page_count' in st.session_state:
197
  # uncomment to find xy coordinates
198
  # cv2.imwrite(f"gray_image_{index}.png", gray_image_np)
199
  # st.image(Image.fromarray(gray_image_np))
200
- if index + st.session_state.num_pages_to_extract == 34:
201
  cv2.line(gray_image_np, (223, 414), (223, 1185), 0, 2)
202
  cv2.line(gray_image_np, (1527, 414), (1527, 1185), 0, 2)
203
- if index + st.session_state.num_pages_to_extract == 35:
204
  cv2.line(gray_image_np, (176, 248), (176, 1760), 0, 2)
205
  cv2.line(gray_image_np, (1551, 248), (1551, 1760), 0, 2)
206
- if index + st.session_state.num_pages_to_extract == 36:
207
  cv2.line(gray_image_np, (225, 229), (225, 1703), 0, 2)
208
  cv2.line(gray_image_np, (1601, 229), (1601, 1703), 0, 2)
209
- if index + st.session_state.num_pages_to_extract == 37:
210
  cv2.line(gray_image_np, (173, 227), (173, 1790), 0, 2)
211
  cv2.line(gray_image_np, (1550, 227), (1550, 1790), 0, 2)
212
- if index + st.session_state.num_pages_to_extract == 38:
213
  cv2.line(gray_image_np, (222, 227), (222, 1444), 0, 2)
214
  cv2.line(gray_image_np, (1600, 227), (1600, 1444), 0, 2)
215
- if index + st.session_state.num_pages_to_extract == 39:
216
  cv2.line(gray_image_np, (175, 227), (175, 1229), 0, 2)
217
  cv2.line(gray_image_np, (1551, 227), (1551, 1229), 0, 2)
218
  # st.image(Image.fromarray(gray_image_np))
 
156
  if 'page_count' in st.session_state:
157
  st.write(f"total page count = {st.session_state.page_count}")
158
  if 'num_pages_to_extract'not in st.session_state:
159
+ st.session_state.start_page_to_extract = st.slider('Start page number:', min_value=1, max_value=st.session_state.page_count - 1, value=1, key='num_pages_to_extract_slider')
160
  else:
161
+ st.session_state.start_page_to_extract = st.slider('Start page number:', min_value=1, max_value=st.session_state.page_count - 1, value=st.session_state.start_page_to_extract, key='num_pages_to_extract_slider')
162
 
163
  if 'num_pages_to_extract2'not in st.session_state:
164
+ st.session_state.end_page_to_extract = st.slider('End page number', min_value=1, max_value=st.session_state.page_count - 1, value=st.session_state.page_count - 1, key='num_pages_to_extract_slider2')
165
  else:
166
+ st.session_state.end_page_to_extract = st.slider('End Page number', min_value=1, max_value=st.session_state.page_count - 1, value=st.session_state.end_page_to_extract, key='num_pages_to_extract_slider2')
167
 
168
 
169
+ st.write(f"num of pages to extract {st.session_state.start_page_to_extract}")
170
  if 'run_button' in st.session_state and st.session_state.run_button == True:
171
  st.session_state.running = True
172
  else:
 
182
  pdf_tables_image_list=[]
183
  st.session_state.pdf_text_list=[]
184
 
185
+ for page_number in range(st.session_state.start_page_to_extract-1,
186
+ st.session_state.end_page_to_extract + 1):
187
  # print(f"page_number={page_number}")
188
  image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
189
  st.session_state.color_image_list.append(image[0])
190
+ progress_percentage = (page_number-st.session_state.start_page_to_extract+1) / (st.session_state.end_page_to_extract - st.session_state.start_page_to_extract+1)
191
  read_pdf_progress_bar.progress(progress_percentage)
192
  read_pdf_progress_bar.progress(0)
193
  for index, image in enumerate(st.session_state.color_image_list):
 
197
  # uncomment to find xy coordinates
198
  # cv2.imwrite(f"gray_image_{index}.png", gray_image_np)
199
  # st.image(Image.fromarray(gray_image_np))
200
+ if index + st.session_state.start_page_to_extract == 34:
201
  cv2.line(gray_image_np, (223, 414), (223, 1185), 0, 2)
202
  cv2.line(gray_image_np, (1527, 414), (1527, 1185), 0, 2)
203
+ if index + st.session_state.start_page_to_extract == 35:
204
  cv2.line(gray_image_np, (176, 248), (176, 1760), 0, 2)
205
  cv2.line(gray_image_np, (1551, 248), (1551, 1760), 0, 2)
206
+ if index + st.session_state.start_page_to_extract == 36:
207
  cv2.line(gray_image_np, (225, 229), (225, 1703), 0, 2)
208
  cv2.line(gray_image_np, (1601, 229), (1601, 1703), 0, 2)
209
+ if index + st.session_state.start_page_to_extract == 37:
210
  cv2.line(gray_image_np, (173, 227), (173, 1790), 0, 2)
211
  cv2.line(gray_image_np, (1550, 227), (1550, 1790), 0, 2)
212
+ if index + st.session_state.start_page_to_extract == 38:
213
  cv2.line(gray_image_np, (222, 227), (222, 1444), 0, 2)
214
  cv2.line(gray_image_np, (1600, 227), (1600, 1444), 0, 2)
215
+ if index + st.session_state.start_page_to_extract == 39:
216
  cv2.line(gray_image_np, (175, 227), (175, 1229), 0, 2)
217
  cv2.line(gray_image_np, (1551, 227), (1551, 1229), 0, 2)
218
  # st.image(Image.fromarray(gray_image_np))