File size: 4,046 Bytes
3849c89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import base64
import hashlib
import requests
import os
import streamlit as st
# BOOKSHINE_HOST = 'https://api.bookshine.at'
BOOKSHINE_HOST = 'http://localhost'
BOOKSHINE_PORT = '4690'
PASSWORDS = os.environ.get("PASSWORDS", "").split(",")
def password_is_correct(password):
return password in PASSWORDS
def process_docx(uploaded_file):
# Read and encode file contents
encoded_file = base64.b64encode(uploaded_file.read()).decode('utf-8')
payload = {
"filename": uploaded_file.name,
"attachment_b64": encoded_file,
"include_diff": True,
"include_docx": True,
"include_commas": True,
"is_demo": False,
}
response = requests.post(f'{BOOKSHINE_HOST}:{BOOKSHINE_PORT}/process_md', json=payload)
# Decode response and create download link
if response.ok:
response = response.json()
# make corrected docx available for download
decoded_file = base64.b64decode(response['docx'])
out_name = uploaded_file.name.replace('.docx', '-bookshine.docx')
st.download_button('✅ Korrigierten Umbruch herunterladen', data=decoded_file, file_name=out_name)
# show diff
diff = response["diff"].replace("¶", " ")
diff = diff.replace("<ins>", "<ins style='background-color: #c8e6c9; text-decoration: underline;'>")
st.write(diff, unsafe_allow_html=True)
else:
st.error('Es ist ein Fehler aufgetreten')
return
def process_pdf(uploaded_file):
# Read and encode file contents
encoded_file = base64.b64encode(uploaded_file.read()).decode('utf-8')
# md5 encode file contents
md5 = hashlib.md5(encoded_file.encode()).hexdigest()
payload = {
"attachment_id": md5,
"attachment_b64": encoded_file,
}
response = requests.post(f'{BOOKSHINE_HOST}:{BOOKSHINE_PORT}/process_umbruch', json=payload)
# Decode response and create download link
if response.ok:
response = response.json()
# trennungen = [a for a in response["annotations"] if a["type"] == "TRENNUNG"]
b64 = response['annotated']
decoded_file = base64.b64decode(b64)
out_name = uploaded_file.name.replace('.pdf', '-bookshine.pdf')
st.download_button('✅ Report herunterladen', data=decoded_file, file_name=out_name)
# show pdf
pdf_display = f'<iframe src="data:application/pdf;base64,{b64}" width="800" height="800" type="application/pdf"></iframe>'
st.write(pdf_display, unsafe_allow_html=True)
else:
st.error('Es ist ein Fehler aufgetreten')
def main():
st.title('Bookshine')
if "in_progress" not in st.session_state:
st.session_state.in_progress = False
# st.write("Bitte geben Sie die E-Mail-Adresse an, an die Sie die Resultate geschickt haben wollen.")
st.write("Wenn Sie Bookshine testen wollen, aber keinen Zugriffscode haben, schreiben Sie bitte eine E-Mail an [email protected].")
password = st.text_input("Zugriffscode")
uploaded_file = st.file_uploader('Manuskript (.docx) oder Umbruch (.pdf) hochladen', type=['docx', 'pdf'], accept_multiple_files=False)
if not uploaded_file:
return
process_button_name = "✨ Manuskript korrigieren"
if uploaded_file.name.endswith('.pdf'):
process_button_name = "✨ Umbruch prüfen"
def set_in_progress():
st.session_state.in_progress = True
# API request button
if st.button(process_button_name, on_click=set_in_progress, disabled=not password_is_correct(password) or st.session_state.in_progress):
with st.spinner('Datei wird verarbeitet …'):
if uploaded_file.name.endswith('.docx'):
process_docx(uploaded_file)
elif uploaded_file.name.endswith('.pdf'):
process_pdf(uploaded_file)
else:
st.warning('Bitte laden Sie eine Datei des Typs .docx oder .pdf hoch.')
st.session_state.in_progress = False
if __name__ == '__main__':
main()
|