Spaces:
Sleeping
Sleeping
missing first page fixed
Browse files
app.py
CHANGED
@@ -156,17 +156,17 @@ if uploaded_locked_pdf_file is not None:
|
|
156 |
if 'page_count' in st.session_state:
|
157 |
st.write(f"total page count = {st.session_state.page_count}")
|
158 |
if 'num_pages_to_extract'not in st.session_state:
|
159 |
-
st.session_state.
|
160 |
else:
|
161 |
-
st.session_state.
|
162 |
|
163 |
if 'num_pages_to_extract2'not in st.session_state:
|
164 |
-
st.session_state.
|
165 |
else:
|
166 |
-
st.session_state.
|
167 |
|
168 |
|
169 |
-
st.write(f"num of pages to extract {st.session_state.
|
170 |
if 'run_button' in st.session_state and st.session_state.run_button == True:
|
171 |
st.session_state.running = True
|
172 |
else:
|
@@ -182,12 +182,12 @@ if 'page_count' in st.session_state:
|
|
182 |
pdf_tables_image_list=[]
|
183 |
st.session_state.pdf_text_list=[]
|
184 |
|
185 |
-
for page_number in range(st.session_state.
|
186 |
-
st.session_state.
|
187 |
# print(f"page_number={page_number}")
|
188 |
image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
|
189 |
st.session_state.color_image_list.append(image[0])
|
190 |
-
progress_percentage = (page_number-st.session_state.
|
191 |
read_pdf_progress_bar.progress(progress_percentage)
|
192 |
read_pdf_progress_bar.progress(0)
|
193 |
for index, image in enumerate(st.session_state.color_image_list):
|
@@ -197,22 +197,22 @@ if 'page_count' in st.session_state:
|
|
197 |
# uncomment to find xy coordinates
|
198 |
# cv2.imwrite(f"gray_image_{index}.png", gray_image_np)
|
199 |
# st.image(Image.fromarray(gray_image_np))
|
200 |
-
if index + st.session_state.
|
201 |
cv2.line(gray_image_np, (223, 414), (223, 1185), 0, 2)
|
202 |
cv2.line(gray_image_np, (1527, 414), (1527, 1185), 0, 2)
|
203 |
-
if index + st.session_state.
|
204 |
cv2.line(gray_image_np, (176, 248), (176, 1760), 0, 2)
|
205 |
cv2.line(gray_image_np, (1551, 248), (1551, 1760), 0, 2)
|
206 |
-
if index + st.session_state.
|
207 |
cv2.line(gray_image_np, (225, 229), (225, 1703), 0, 2)
|
208 |
cv2.line(gray_image_np, (1601, 229), (1601, 1703), 0, 2)
|
209 |
-
if index + st.session_state.
|
210 |
cv2.line(gray_image_np, (173, 227), (173, 1790), 0, 2)
|
211 |
cv2.line(gray_image_np, (1550, 227), (1550, 1790), 0, 2)
|
212 |
-
if index + st.session_state.
|
213 |
cv2.line(gray_image_np, (222, 227), (222, 1444), 0, 2)
|
214 |
cv2.line(gray_image_np, (1600, 227), (1600, 1444), 0, 2)
|
215 |
-
if index + st.session_state.
|
216 |
cv2.line(gray_image_np, (175, 227), (175, 1229), 0, 2)
|
217 |
cv2.line(gray_image_np, (1551, 227), (1551, 1229), 0, 2)
|
218 |
# st.image(Image.fromarray(gray_image_np))
|
|
|
156 |
if 'page_count' in st.session_state:
|
157 |
st.write(f"total page count = {st.session_state.page_count}")
|
158 |
if 'num_pages_to_extract'not in st.session_state:
|
159 |
+
st.session_state.start_page_to_extract = st.slider('Start page number:', min_value=1, max_value=st.session_state.page_count - 1, value=1, key='num_pages_to_extract_slider')
|
160 |
else:
|
161 |
+
st.session_state.start_page_to_extract = st.slider('Start page number:', min_value=1, max_value=st.session_state.page_count - 1, value=st.session_state.start_page_to_extract, key='num_pages_to_extract_slider')
|
162 |
|
163 |
if 'num_pages_to_extract2'not in st.session_state:
|
164 |
+
st.session_state.end_page_to_extract = st.slider('End page number', min_value=1, max_value=st.session_state.page_count - 1, value=st.session_state.page_count - 1, key='num_pages_to_extract_slider2')
|
165 |
else:
|
166 |
+
st.session_state.end_page_to_extract = st.slider('End Page number', min_value=1, max_value=st.session_state.page_count - 1, value=st.session_state.end_page_to_extract, key='num_pages_to_extract_slider2')
|
167 |
|
168 |
|
169 |
+
st.write(f"num of pages to extract {st.session_state.start_page_to_extract}")
|
170 |
if 'run_button' in st.session_state and st.session_state.run_button == True:
|
171 |
st.session_state.running = True
|
172 |
else:
|
|
|
182 |
pdf_tables_image_list=[]
|
183 |
st.session_state.pdf_text_list=[]
|
184 |
|
185 |
+
for page_number in range(st.session_state.start_page_to_extract-1,
|
186 |
+
st.session_state.end_page_to_extract + 1):
|
187 |
# print(f"page_number={page_number}")
|
188 |
image = pdf2image.convert_from_path(st.session_state.uploaded_pdf_path, first_page=page_number+1, last_page=page_number+1)
|
189 |
st.session_state.color_image_list.append(image[0])
|
190 |
+
progress_percentage = (page_number-st.session_state.start_page_to_extract+1) / (st.session_state.end_page_to_extract - st.session_state.start_page_to_extract+1)
|
191 |
read_pdf_progress_bar.progress(progress_percentage)
|
192 |
read_pdf_progress_bar.progress(0)
|
193 |
for index, image in enumerate(st.session_state.color_image_list):
|
|
|
197 |
# uncomment to find xy coordinates
|
198 |
# cv2.imwrite(f"gray_image_{index}.png", gray_image_np)
|
199 |
# st.image(Image.fromarray(gray_image_np))
|
200 |
+
if index + st.session_state.start_page_to_extract == 34:
|
201 |
cv2.line(gray_image_np, (223, 414), (223, 1185), 0, 2)
|
202 |
cv2.line(gray_image_np, (1527, 414), (1527, 1185), 0, 2)
|
203 |
+
if index + st.session_state.start_page_to_extract == 35:
|
204 |
cv2.line(gray_image_np, (176, 248), (176, 1760), 0, 2)
|
205 |
cv2.line(gray_image_np, (1551, 248), (1551, 1760), 0, 2)
|
206 |
+
if index + st.session_state.start_page_to_extract == 36:
|
207 |
cv2.line(gray_image_np, (225, 229), (225, 1703), 0, 2)
|
208 |
cv2.line(gray_image_np, (1601, 229), (1601, 1703), 0, 2)
|
209 |
+
if index + st.session_state.start_page_to_extract == 37:
|
210 |
cv2.line(gray_image_np, (173, 227), (173, 1790), 0, 2)
|
211 |
cv2.line(gray_image_np, (1550, 227), (1550, 1790), 0, 2)
|
212 |
+
if index + st.session_state.start_page_to_extract == 38:
|
213 |
cv2.line(gray_image_np, (222, 227), (222, 1444), 0, 2)
|
214 |
cv2.line(gray_image_np, (1600, 227), (1600, 1444), 0, 2)
|
215 |
+
if index + st.session_state.start_page_to_extract == 39:
|
216 |
cv2.line(gray_image_np, (175, 227), (175, 1229), 0, 2)
|
217 |
cv2.line(gray_image_np, (1551, 227), (1551, 1229), 0, 2)
|
218 |
# st.image(Image.fromarray(gray_image_np))
|