In [8]:
import xml.etree.ElementTree as ET
import requests
import csv

In [9]:
xml_url = "https://wayback.maptiles.arcgis.com/arcgis/rest/services/world_imagery/mapserver/wmts/1.0.0/wmtscapabilities.xml"

In [10]:
# parse the xml
xml = requests.get(xml_url).text
root = ET.fromstring(xml)

In [12]:
# Namespace definitions
namespaces = {
    'ows': 'https://www.opengis.net/ows/1.1',
    '': 'https://www.opengis.net/wmts/1.0'  # Default namespace
}

# Open CSV file for writing
with open('wayback.csv', 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)

    # Write header row
    writer.writerow([
        'Title', 
        'Identifier', 
        'LowerCorner', 
        'UpperCorner', 
        'Format', 
        'TileMatrixSetLinks', 
        'ResourceURL_Template'
    ])

    # Extract and write data
    for layer in root.findall('.//Layer', namespaces):
        title = layer.find('ows:Title', namespaces).text
        identifier = layer.find('ows:Identifier', namespaces).text

        bounding_box = layer.find('ows:BoundingBox/ows:LowerCorner', namespaces)
        lower_corner = bounding_box.text if bounding_box is not None else ''

        bounding_box = layer.find('ows:BoundingBox/ows:UpperCorner', namespaces)
        upper_corner = bounding_box.text if bounding_box is not None else ''

        fmt = layer.find('Format', namespaces).text if layer.find('Format', namespaces) else ''

        tile_matrix_links = [tms.text for tms in layer.findall('TileMatrixSetLink/TileMatrixSet', namespaces)]
        tile_matrix_set_links = ', '.join(tile_matrix_links)

        resource_url = layer.find('ResourceURL', namespaces)
        resource_url_template = resource_url.get('template') if resource_url is not None else ''

        writer.writerow([
            title,
            identifier,
            lower_corner,
            upper_corner,
            fmt,
            tile_matrix_set_links,
            resource_url_template
        ])

# Parse dates

In [1]:
import pandas as pd

df = pd.read_csv('wayback.csv')
df.head(2)

Unnamed: 0,Title,Identifier,LowerCorner,UpperCorner,Format,TileMatrixSetLinks,ResourceURL_Template
0,World Imagery (Wayback 2024-10-10),WB_2024_R11,-2.003750722959434E7 -2.003750722959434E7,2.003750722959434E7 2.003750722959434E7,,"default028mm, GoogleMapsCompatible",https://wayback.maptiles.arcgis.com/arcgis/res...
1,World Imagery (Wayback 2024-09-19),WB_2024_R10,-2.003750722959434E7 -2.003750722959434E7,2.003750722959434E7 2.003750722959434E7,,"default028mm, GoogleMapsCompatible",https://wayback.maptiles.arcgis.com/arcgis/res...


In [3]:
df['date'] = df['Title'].str.extract(r'(\d{4}-\d{2}-\d{2})')
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df.head(2)

Unnamed: 0,Title,Identifier,LowerCorner,UpperCorner,Format,TileMatrixSetLinks,ResourceURL_Template,date
0,World Imagery (Wayback 2024-10-10),WB_2024_R11,-2.003750722959434E7 -2.003750722959434E7,2.003750722959434E7 2.003750722959434E7,,"default028mm, GoogleMapsCompatible",https://wayback.maptiles.arcgis.com/arcgis/res...,2024-10-10
1,World Imagery (Wayback 2024-09-19),WB_2024_R10,-2.003750722959434E7 -2.003750722959434E7,2.003750722959434E7 2.003750722959434E7,,"default028mm, GoogleMapsCompatible",https://wayback.maptiles.arcgis.com/arcgis/res...,2024-09-19


In [4]:
df.to_parquet('wayback.parquet', index=False)