# Geodata-Harvester App # Description: Streamlit app for the Geodata-Harvester package # Author: Sebastian Haan import streamlit as st import sys import os import pandas as pd import numpy as np import yaml import shutil from types import SimpleNamespace from PIL import Image # Import the core package Geodata-Harvester from geodata_harvester import harvest ## Limitations: # The Google Earth Engine extension for Geodata-Harvester is not natively supported in Streamlit, # unless you run the app locally or provide GEE service account token. FNAME_ZIP = 'harvest_collection.zip' OUTPATH = 'harvest_collection' # Link to settings templates settings_template_link = 'https://huggingface.co./spaces/SIH/geodata-harvester-app/tree/main/data/templates_settings' # link to data template data_template_link = 'https://huggingface.co./spaces/SIH/geodata-harvester-app/tree/main/data' # link to github page link_to_githubpage = 'https://sydney-informatics-hub.github.io/geodata-harvester/' # link to settings overview link_to_settings_overview = 'https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Settings_Overview.md' menu_dict = { "About": "https://sydney-informatics-hub.github.io/geodata-harvester/", "Get help": "https://github.com/Sydney-Informatics-Hub/geodata-harvester/"} def init_header(): # Add a title st.title('Geodata-Harvester App') st.subheader("Jumpstart your geospatial analysis.") image = Image.open('assets/dataharvester_streamlit_logo_blue.jpg') st.image(image) st.write(f"The Geodata-Harvester automates geodata download and spatio-temporal processing from a large range of datasources into ready-made datasets.\ This application is a lightweight Streamlit wrapper crafted to work seamlessly with the Geodata-Harvester package.\ For full accessibility to a comprehensive set of features and options, please visit the [Geodata-Harvester project]({link_to_githubpage}).") #st.divider() def init_howto(): #with workpanel.container(): st.subheader('How to') st.write(f'1) Prepare a settings file that lists all requested layers and spatio-temporal settings (see [templates]({settings_template_link})). For more info please see section below.') st.write('2) Upload the settings file in the sidebar.') st.write('3) Optional: Select csv file including Latitude and Longitude points. This will enable the Geodata-Harvester to automatically populate the table with extracted data for all given locations.') st.write('4) Then click `Run Geodata-Harvester` on the top of the sidebar.') st.write('5) Download and processing of all data might take a couple of minutes. Once completed, you will be able to download all generated results as zip.') def print_attr(): st.subheader('Attributions and Acknowledgements') st.write('This software was developed by the Sydney Informatics Hub, a core research facility of the University of Sydney.') st.write('Acknowledgments are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub.') st.write('If you make use of this software for your research project, please include the following acknowledgment:') st.write('“This research was supported by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney."') def load_settings(settings_file): if settings_file is not None: fname_settings = settings_file#.getvalue() settings = yaml.load(settings_file, Loader=yaml.FullLoader) # Parse settings dictionary as namespace settings = SimpleNamespace(**settings) settings.date_min = str(settings.date_min) settings.date_max = str(settings.date_max) if st.session_state['data_file'] is not None: settings.infile = st.session_state['data_file'] settings.outpath = OUTPATH return settings def print_settings(settings): if settings: #with workpanel.container(): st.header("Settings loaded:") for key in settings.__dict__: if key == "target_sources": st.write(f"settings.{key}:") for source in settings.target_sources: st.write(f" '{source}': {settings.target_sources[source]}") else: st.write(f"settings.{key} : {settings.__dict__[key]}") def print_info(): st.subheader('Settings Overview') st.write('The Geodata-Harvester is controlled by a settings file in YAML format, \ which includes all user-defined settings and data layers for extraction and processing.') st.write('Example settings file can be found in the [templates]({settings_template_link}).') st.write('The settings file includes the following sections:') st.markdown("""yaml #Bounding Box as (lng_min, lat_min, lng_max, lat_max): #If not provided, the Geodata-Harvester will try to infer bbox from points in Locations file target_bbox: [149, -30, 149.9, -29] #Select start date: date_min: : 2023-01-01 #Select end date: date_max: : 2023-02-01 #Spatial Resolution [in arcsec]: target_res: 100.0 # Number of time interval slices in given date range temp_intervals: 1 #Headername of Latitude in input file (only relevant if data file provided): colname_lat: Lat #Headername of Longitude in input file (only relevant if data file provided): colname_lng: Long target_sources: #Satellite data from Digital Earth Australia (optional) DEA: - landsat_barest_earth #National Digital Elevation Model (DEM) 1 Second (optional) DEM: - DEM - Slope - Aspect #Landscape Data (optional) Landscape: - Relief_300m #Radiometric Data (optional) Radiometric: - radmap2019_grid_dose_terr_awags_rad_2019 - radmap2019_grid_dose_terr_filtered_awags_rad_2019 # SILO Climate Data (optional) # temporal aggregation options: 'mean', 'median', 'sum', 'std', 'perc95', 'perc5', 'max', 'min' SILO: max_temp: - Median min_temp: - Median #Soil data from SLGA (optional) SLGA: Bulk_Density: - 0-5cm Clay: - 0-5cm """) st.subheader('Data Overview') # Data Overview list st.markdown(""" The following data sources are currently supported by the Geodata-Harvester Streamlit app: - Soil Data 3D SLGA (Australia) - SILO Climate Database (Australia) - National Digital Elevation Model incl. Slope, Aspect (Australia) - Digital Earth Australia Geoscience Earth Observations (incl. Sentinel, Landsat, MODIS for Australia) - Radiometric Data (Australia) - Landscape Data (Australia) For more information, please visit the [Geodata-Harvester Data Overview]({https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Data_Overview.md}). Note that Google Earth Engine is not supported in streamlit app unless you have a GEE token or run the app locally. """) def load_data(data_file): # load data into pandas dataframe from csv file if data_file is not None: #st.sidebar.write(f"Loading data from {data_file}") df = pd.read_csv(data_file) return df def show_results(): df_results = pd.read_csv(st.session_state['fname_results']) st.write('Extracted Data Table:') st.write(df_results) def zip_folder(folder_path): filename = FNAME_ZIP.split('.')[0] directory = folder_path shutil.make_archive(filename, "zip", folder_path) def gee_init(): # initialize earth engine (experimental) st.sidebar.header('Google Earth Engine (Optional)') st.sidebar.write(f'Please copy your Google Earth Engine authentication token in the field below.') os.environ['EARTHENGINE_TOKEN'] = st.sidebar.text_input( label="Enter Google Earth Engine Token:", type="password" ).strip() def open_sidebar(): # Add run st.sidebar.header('Run Geodata-Harvester') st.sidebar.write(f'This will harvest geodata as specified in the settings file below.') button_run = st.sidebar.button('Run Geodata-Harvester') st.sidebar.divider() # Add yaml file settings file upload st.sidebar.header('Settings File') settings_file = st.sidebar.file_uploader(f"Choose a yaml settings file (see [templates]({settings_template_link}))") button_show_settings = st.sidebar.button('Show Settings') st.session_state['settings_file'] = settings_file st.sidebar.divider() # Add data file uploader st.sidebar.header('Locations File') data_file = st.sidebar.file_uploader(f"Upload a CSV file with Latitudes and Longitudes (see [example data csv]({data_template_link}))") button_show_data = st.sidebar.button('Show Input Data') st.session_state['data_file'] = data_file st.sidebar.divider() # optional gee_init() return button_show_settings, button_show_data, button_run def run_harvester(): if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None): fname_settings = st.session_state['settings_file'] settings = load_settings(fname_settings) if ('data_file2' in st.session_state) & (st.session_state['data_file2'] is not None): settings.infile = st.session_state['data_file2'] #.name #st.write(f"settings.infile: {settings.infile}") #outpath = settings.outpath os.makedirs(name=OUTPATH, exist_ok=True) # write settings to disk with open(os.path.join(OUTPATH,'settings.yaml'), 'w') as file: yaml.dump(settings.__dict__, file) with st.spinner('Running Geodata-Harvester...'): df = harvest.run(os.path.join(OUTPATH,'settings.yaml'), return_df = True) st.success('Harvest complete!', icon="✅") return df else: st.error('Please upload settings file.') return None ########## Main ########### def main(): #st.set_page_config(layout="wide") st.set_page_config(page_title="Geodata-Harvester App", layout="centered", menu_items=menu_dict) #st.markdown(footer, unsafe_allow_html=True) init_header() with st.expander("How to use this app"): init_howto() with st.expander("Settings and Data Overview"): print_info() with st.expander("Attributions and Acknowledgements"): print_attr() #st.divider() workpanel = st.empty() button_show_settings, button_show_data, button_run = open_sidebar() if button_show_settings: settings = load_settings(st.session_state['settings_file']) print_settings(settings) if button_show_data: data_file = st.session_state['data_file'] df = load_data(data_file) st.header("Data Table:") st.write(df) if button_run: if ('data_file' in st.session_state) & (st.session_state['data_file'] is not None): # save data so it can be read by geopandas later df = load_data(st.session_state['data_file']) df.to_csv('temp_data.csv') st.session_state['data_file2'] = 'temp_data.csv' else: st.session_state['data_file2'] = None if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None): df_results = run_harvester() # zip result folder in python and make available for download with st.spinner(f'Generating {FNAME_ZIP} file for download...'): zip_folder(OUTPATH) st.session_state['fname_results_zip'] = FNAME_ZIP st.success('Zip file generated!', icon="✅") # delete temp data file if os.path.exists('temp_data.csv'): os.remove('temp_data.csv') #shutil.rmtree(outpath) # show results st.session_state['fname_results'] = os.path.join(OUTPATH,'results.csv') else: #workpanel.empty() #with workpanel.container(): st.text('No settings file provided. Please add settings file.') # Show options and buttons after run if 'fname_results_zip' in st.session_state: st.write('Harvester Results:') with open(FNAME_ZIP, 'rb') as f: st.download_button('Download Results (zip)', f, file_name=FNAME_ZIP) if os.path.exists(st.session_state['fname_results']): button_show_results = st.button('Show Result Table') if button_show_results: show_results() if __name__ == "__main__": main() #### Debugging options #st.write('Streamlit version:', st.__version__) #st.write('Session state:', st.session_state) #for key in st.session_state: # st.write(f"{key}: {st.session_state[key]}")