Spaces:

nazianafis
/

Extract-Tables-From-PDF

Running

Change to st.cache_data

fdad087 about 1 year ago

1.15 kB

	import streamlit as st
	import subprocess
	from subprocess import STDOUT, check_call
	import os
	import base64
	import camelot as cam

	@st.cache_data
	def gh():
	proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None, stdout=open(os.devnull,'wb'), stderr=STDOUT, executable="/bin/bash")

	gh()

	st.title("Extract Tables from PDFs")

	input_pdf = st.file_uploader(label="Upload PDF here",type='pdf')

	st.markdown("### Page Number")

	page_number = st.text_input("Enter the page # from where you want the table", value=1)

	if input_pdf is not None:

	with open("input.pdf","wb") as f:
	base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
	f.write(base64.b64decode(base64_pdf))
	f.close()

	table = cam.read_pdf("input.pdf",pages = page_number, flavor = 'stream')

	st.markdown("## Number of Tables")

	st.write(table)

	if len(table)>0:

	option = st.selectbox(label="Select the table to be displayed", options = range(len(table)+1))

	st.markdown("### Output Table")

	st.dataframe(table[int(option)-1].df)