Spaces:
Running
on
L4
Running
on
L4
mikonvergence
commited on
Upload 4 files
Browse files- helpers/functional.py +85 -0
- helpers/grid.py +256 -0
- helpers/s2l1c_metadata.parquet +3 -0
- helpers/s2l2a_metadata.parquet +3 -0
helpers/functional.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fsspec.parquet import open_parquet_file
|
2 |
+
import fsspec
|
3 |
+
import pyarrow.parquet as pq
|
4 |
+
from .grid import *
|
5 |
+
import pandas as pd
|
6 |
+
from io import BytesIO
|
7 |
+
import os
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
# GLOBAL VARIABLES
|
11 |
+
if os.path.isfile('metadata.parquet'):
|
12 |
+
meta_path = 'metadata.parquet'
|
13 |
+
else:
|
14 |
+
DATASET_NAME = 'Major-TOM/Core-S2L2A'
|
15 |
+
meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
|
16 |
+
|
17 |
+
grid = Grid(10, latitude_range=(-90,90), longitude_range=(-180,180))
|
18 |
+
df = pd.read_parquet(meta_path)
|
19 |
+
|
20 |
+
# HELPER FUNCTIONS
|
21 |
+
def gridcell2ints(grid_string):
|
22 |
+
up = int(grid_string.split('_')[0][:-1]) * (2*int(grid_string.split('_')[0][-1]=='U') - 1) # +ve if up
|
23 |
+
right = int(grid_string.split('_')[1][:-1]) * (2*int(grid_string.split('_')[1][-1]=='R') - 1) # +ve if R
|
24 |
+
|
25 |
+
return up, right
|
26 |
+
|
27 |
+
def row2image(parquet_url, parquet_row, fullrow_read=True):
|
28 |
+
|
29 |
+
if fullrow_read:
|
30 |
+
# option 1
|
31 |
+
f=fsspec.open(parquet_url)
|
32 |
+
temp_path = f.open()
|
33 |
+
else:
|
34 |
+
# option 2
|
35 |
+
temp_path = open_parquet_file(parquet_url,columns = ["thumbnail"])
|
36 |
+
|
37 |
+
with pq.ParquetFile(temp_path) as pf:
|
38 |
+
first_row_group = pf.read_row_group(parquet_row, columns=['thumbnail'])
|
39 |
+
|
40 |
+
stream = BytesIO(first_row_group['thumbnail'][0].as_py())
|
41 |
+
return Image.open(stream)
|
42 |
+
|
43 |
+
def row2s2(parquet_url, parquet_row, s2_bands = ["B04", "B03", "B02"]):
|
44 |
+
with open_parquet_file(parquet_url,columns = s2_bands) as f:
|
45 |
+
with pq.ParquetFile(f) as pf:
|
46 |
+
first_row_group = pf.read_row_group(parquet_row, columns=s2_bands)
|
47 |
+
|
48 |
+
return first_row_group
|
49 |
+
|
50 |
+
def cell2row(grid_string, meta_df, return_row = False):
|
51 |
+
row_U, col_R = gridcell2ints(grid_string)
|
52 |
+
R = meta_df.query('grid_row_u == {} & grid_col_r == {}'.format(row_U, col_R))
|
53 |
+
|
54 |
+
if not R.empty:
|
55 |
+
if return_row:
|
56 |
+
return R.parquet_url.item(), R.parquet_row.item(), R
|
57 |
+
else:
|
58 |
+
return R.parquet_url.item(), R.parquet_row.item()
|
59 |
+
else:
|
60 |
+
return None
|
61 |
+
|
62 |
+
def map_to_image(map, return_centre=False):
|
63 |
+
|
64 |
+
# 1. get bounds
|
65 |
+
bbox = map.get_bbox()
|
66 |
+
center = [(bbox[3]+bbox[1])/2, (bbox[2]+bbox[0])/2]
|
67 |
+
|
68 |
+
# 2. translate coordinate to major-tom tile
|
69 |
+
rows, cols = grid.latlon2rowcol([center[0]], [center[1]])
|
70 |
+
|
71 |
+
# 3. translate major-tom cell to row in parquet
|
72 |
+
row = cell2row("{}_{}".format(rows[0],cols[0]), df, return_row = True)
|
73 |
+
|
74 |
+
if row is not None:
|
75 |
+
parquet_url, parquet_row, meta_row = row
|
76 |
+
img = row2image(parquet_url, parquet_row)
|
77 |
+
# 4. acquire image # X. update map
|
78 |
+
lat, lon = meta_row.centre_lat.item(), meta_row.centre_lon.item()
|
79 |
+
|
80 |
+
if return_centre:
|
81 |
+
return img, (lat,lon)
|
82 |
+
else:
|
83 |
+
return img
|
84 |
+
else:
|
85 |
+
return None
|
helpers/grid.py
CHANGED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import math
|
3 |
+
import pandas as pd
|
4 |
+
import geopandas as gpd
|
5 |
+
from shapely.geometry import LineString, Polygon
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
class Grid():
|
11 |
+
|
12 |
+
RADIUS_EQUATOR = 6378.137 # km
|
13 |
+
|
14 |
+
def __init__(self,dist,latitude_range=(-85,85),longitude_range=(-180,180),utm_definition='bottomleft'):
|
15 |
+
self.dist = dist
|
16 |
+
self.latitude_range = latitude_range
|
17 |
+
self.longitude_range = longitude_range
|
18 |
+
self.utm_definition = utm_definition
|
19 |
+
self.rows,self.lats = self.get_rows()
|
20 |
+
self.points, self.points_by_row = self.get_points()
|
21 |
+
|
22 |
+
def get_rows(self):
|
23 |
+
|
24 |
+
# Define set of latitudes to use, based on the grid distance
|
25 |
+
arc_pole_to_pole = math.pi * self.RADIUS_EQUATOR
|
26 |
+
num_divisions_in_hemisphere = math.ceil(arc_pole_to_pole / self.dist)
|
27 |
+
|
28 |
+
latitudes = np.linspace(-90, 90, num_divisions_in_hemisphere+1)[:-1]
|
29 |
+
latitudes = np.mod(latitudes, 180) - 90
|
30 |
+
|
31 |
+
# order should be from south to north
|
32 |
+
latitudes = np.sort(latitudes)
|
33 |
+
|
34 |
+
zeroth_row = np.searchsorted(latitudes,0)
|
35 |
+
|
36 |
+
# From 0U-NU and 1D-ND
|
37 |
+
rows = [None] * len(latitudes)
|
38 |
+
rows[zeroth_row:] = [f'{i}U' for i in range(len(latitudes)-zeroth_row)]
|
39 |
+
rows[:zeroth_row] = [f'{abs(i-zeroth_row)}D' for i in range(zeroth_row)]
|
40 |
+
|
41 |
+
# bound to range
|
42 |
+
idxs = (latitudes>=self.latitude_range[0]) * (latitudes<=self.latitude_range[1])
|
43 |
+
rows,latitudes = np.array(rows), np.array(latitudes)
|
44 |
+
rows,latitudes = rows[idxs],latitudes[idxs]
|
45 |
+
|
46 |
+
return rows,latitudes
|
47 |
+
|
48 |
+
def get_circumference_at_latitude(self,lat):
|
49 |
+
|
50 |
+
# Circumference of the cross-section of a sphere at a given latitude
|
51 |
+
|
52 |
+
radius_at_lat = self.RADIUS_EQUATOR * math.cos(lat * math.pi / 180)
|
53 |
+
circumference = 2 * math.pi * radius_at_lat
|
54 |
+
|
55 |
+
return circumference
|
56 |
+
|
57 |
+
def subdivide_circumference(self,lat,return_cols=False):
|
58 |
+
# Provide a list of longitudes that subdivide the circumference of the earth at a given latitude
|
59 |
+
# into equal parts as close as possible to dist
|
60 |
+
|
61 |
+
circumference = self.get_circumference_at_latitude(lat)
|
62 |
+
num_divisions = math.ceil(circumference / self.dist)
|
63 |
+
longitudes = np.linspace(-180,180, num_divisions+1)[:-1]
|
64 |
+
longitudes = np.mod(longitudes, 360) - 180
|
65 |
+
longitudes = np.sort(longitudes)
|
66 |
+
|
67 |
+
|
68 |
+
if return_cols:
|
69 |
+
cols = [None] * len(longitudes)
|
70 |
+
zeroth_idx = np.where(longitudes==0)[0][0]
|
71 |
+
cols[zeroth_idx:] = [f'{i}R' for i in range(len(longitudes)-zeroth_idx)]
|
72 |
+
cols[:zeroth_idx] = [f'{abs(i-zeroth_idx)}L' for i in range(zeroth_idx)]
|
73 |
+
return np.array(cols),np.array(longitudes)
|
74 |
+
|
75 |
+
return np.array(longitudes)
|
76 |
+
|
77 |
+
def get_points(self):
|
78 |
+
|
79 |
+
r_idx = 0
|
80 |
+
points_by_row = [None]*len(self.rows)
|
81 |
+
for r,lat in zip(self.rows,self.lats):
|
82 |
+
point_names,grid_row_names,grid_col_names,grid_row_idx,grid_col_idx,grid_lats,grid_lons,utm_zones,epsgs = [],[],[],[],[],[],[],[],[]
|
83 |
+
cols,lons = self.subdivide_circumference(lat,return_cols=True)
|
84 |
+
|
85 |
+
cols,lons = self.filter_longitude(cols,lons)
|
86 |
+
c_idx = 0
|
87 |
+
for c,lon in zip(cols,lons):
|
88 |
+
point_names.append(f'{r}_{c}')
|
89 |
+
grid_row_names.append(r)
|
90 |
+
grid_col_names.append(c)
|
91 |
+
grid_row_idx.append(r_idx)
|
92 |
+
grid_col_idx.append(c_idx)
|
93 |
+
grid_lats.append(lat)
|
94 |
+
grid_lons.append(lon)
|
95 |
+
if self.utm_definition == 'bottomleft':
|
96 |
+
utm_zones.append(get_utm_zone_from_latlng([lat,lon]))
|
97 |
+
elif self.utm_definition == 'center':
|
98 |
+
center_lat = lat + (1000*self.dist/2)/111_120
|
99 |
+
center_lon = lon + (1000*self.dist/2)/(111_120*math.cos(center_lat*math.pi/180))
|
100 |
+
utm_zones.append(get_utm_zone_from_latlng([center_lat,center_lon]))
|
101 |
+
else:
|
102 |
+
raise ValueError(f'Invalid utm_definition {self.utm_definition}')
|
103 |
+
epsgs.append(f'EPSG:{utm_zones[-1]}')
|
104 |
+
|
105 |
+
c_idx += 1
|
106 |
+
points_by_row[r_idx] = gpd.GeoDataFrame({
|
107 |
+
'name':point_names,
|
108 |
+
'row':grid_row_names,
|
109 |
+
'col':grid_col_names,
|
110 |
+
'row_idx':grid_row_idx,
|
111 |
+
'col_idx':grid_col_idx,
|
112 |
+
'utm_zone':utm_zones,
|
113 |
+
'epsg':epsgs
|
114 |
+
},geometry=gpd.points_from_xy(grid_lons,grid_lats))
|
115 |
+
r_idx += 1
|
116 |
+
points = gpd.GeoDataFrame(pd.concat(points_by_row))
|
117 |
+
# points.reset_index(inplace=True,drop=True)
|
118 |
+
return points, points_by_row
|
119 |
+
|
120 |
+
def group_points_by_row(self):
|
121 |
+
# Make list of different gdfs for each row
|
122 |
+
points_by_row = [None]*len(self.rows)
|
123 |
+
for i,row in enumerate(self.rows):
|
124 |
+
points_by_row[i] = self.points[self.points.row==row]
|
125 |
+
return points_by_row
|
126 |
+
|
127 |
+
def filter_longitude(self,cols,lons):
|
128 |
+
idxs = (lons>=self.longitude_range[0]) * (lons<=self.longitude_range[1])
|
129 |
+
cols,lons = cols[idxs],lons[idxs]
|
130 |
+
return cols,lons
|
131 |
+
|
132 |
+
def latlon2rowcol(self,lats,lons,return_idx=False):
|
133 |
+
"""
|
134 |
+
Convert latitude and longitude to row and column number from the grid
|
135 |
+
"""
|
136 |
+
# Always take bottom left corner of grid cell
|
137 |
+
rows = np.searchsorted(self.lats,lats)-1
|
138 |
+
|
139 |
+
# Get the possible points of the grid cells at the given latitude
|
140 |
+
possible_points = [self.points_by_row[row] for row in rows]
|
141 |
+
|
142 |
+
# For each point, find the rightmost point that is still to the left of the given longitude
|
143 |
+
cols = [poss_points.iloc[np.searchsorted(poss_points.geometry.x,lon)-1].col for poss_points,lon in zip(possible_points,lons)]
|
144 |
+
rows = self.rows[rows]
|
145 |
+
|
146 |
+
if return_idx:
|
147 |
+
# Get the table index for self.points with each row,col pair in rows, cols
|
148 |
+
idx = [self.points[(self.points.row==row) & (self.points.col==col)].index.values[0] for row,col in zip(rows,cols)]
|
149 |
+
return rows,cols,idx
|
150 |
+
return rows,cols
|
151 |
+
|
152 |
+
def rowcol2latlon(self,rows,cols):
|
153 |
+
point_geoms = [self.points.loc[(self.points.row==row) & (self.points.col==col),'geometry'].values[0] for row,col in zip(rows,cols)]
|
154 |
+
lats = [point.y for point in point_geoms]
|
155 |
+
lons = [point.x for point in point_geoms]
|
156 |
+
return lats,lons
|
157 |
+
|
158 |
+
def get_bounded_footprint(self,point,buffer_ratio=0):
|
159 |
+
# Gets the polygon footprint of the grid cell for a given point, bounded by the other grid points' cells.
|
160 |
+
# Grid point defined as bottom-left corner of polygon. Buffer ratio is the ratio of the grid cell's width/height to buffer by.
|
161 |
+
|
162 |
+
bottom,left = point.geometry.y,point.geometry.x
|
163 |
+
row = point.row
|
164 |
+
row_idx = point.row_idx
|
165 |
+
col_idx = point.col_idx
|
166 |
+
next_row_idx = row_idx+1
|
167 |
+
next_col_idx = col_idx+1
|
168 |
+
|
169 |
+
if next_row_idx >= len(self.lats): # If at top row, use difference between top and second-to-top row for height
|
170 |
+
height = (self.lats[row_idx] - self.lats[row_idx-1])
|
171 |
+
top = self.lats[row_idx] + height
|
172 |
+
else:
|
173 |
+
top = self.lats[next_row_idx]
|
174 |
+
|
175 |
+
max_col = len(self.points_by_row[row].col_idx)-1
|
176 |
+
if next_col_idx > max_col: # If at rightmost column, use difference between rightmost and second-to-rightmost column for width
|
177 |
+
width = (self.points_by_row[row].iloc[col_idx].geometry.x - self.points_by_row[row].iloc[col_idx-1].geometry.x)
|
178 |
+
right = self.points_by_row[row].iloc[col_idx].geometry.x + width
|
179 |
+
else:
|
180 |
+
right = self.points_by_row[row].iloc[next_col_idx].geometry.x
|
181 |
+
|
182 |
+
# Buffer the polygon by the ratio of the grid cell's width/height
|
183 |
+
width = right - left
|
184 |
+
height = top - bottom
|
185 |
+
|
186 |
+
buffer_horizontal = width * buffer_ratio
|
187 |
+
buffer_vertical = height * buffer_ratio
|
188 |
+
|
189 |
+
new_left = left - buffer_horizontal
|
190 |
+
new_right = right + buffer_horizontal
|
191 |
+
|
192 |
+
new_bottom = bottom - buffer_vertical
|
193 |
+
new_top = top + buffer_vertical
|
194 |
+
|
195 |
+
bbox = Polygon([(new_left,new_bottom),(new_left,new_top),(new_right,new_top),(new_right,new_bottom)])
|
196 |
+
|
197 |
+
return bbox
|
198 |
+
|
199 |
+
|
200 |
+
def get_utm_zone_from_latlng(latlng):
|
201 |
+
"""
|
202 |
+
Get the UTM ZONE from a latlng list.
|
203 |
+
|
204 |
+
Parameters
|
205 |
+
----------
|
206 |
+
latlng : List[Union[int, float]]
|
207 |
+
The latlng list to get the UTM ZONE from.
|
208 |
+
|
209 |
+
return_epsg : bool, optional
|
210 |
+
Whether or not to return the EPSG code instead of the WKT, by default False
|
211 |
+
|
212 |
+
Returns
|
213 |
+
-------
|
214 |
+
str
|
215 |
+
The WKT or EPSG code.
|
216 |
+
"""
|
217 |
+
assert isinstance(latlng, (list, np.ndarray)), "latlng must be in the form of a list."
|
218 |
+
|
219 |
+
zone = math.floor(((latlng[1] + 180) / 6) + 1)
|
220 |
+
n_or_s = "S" if latlng[0] < 0 else "N"
|
221 |
+
|
222 |
+
false_northing = "10000000" if n_or_s == "S" else "0"
|
223 |
+
central_meridian = str(zone * 6 - 183)
|
224 |
+
epsg = f"32{'7' if n_or_s == 'S' else '6'}{str(zone)}"
|
225 |
+
|
226 |
+
return epsg
|
227 |
+
|
228 |
+
|
229 |
+
if __name__ == '__main__':
|
230 |
+
import matplotlib.pyplot as plt
|
231 |
+
|
232 |
+
dist = 100
|
233 |
+
grid = Grid(dist,latitude_range=(10,70),longitude_range=(-30,60))
|
234 |
+
|
235 |
+
from pprint import pprint
|
236 |
+
|
237 |
+
test_lons = np.random.uniform(-20,50,size=(1000))
|
238 |
+
test_lats = np.random.uniform(12,68,size=(1000))
|
239 |
+
|
240 |
+
test_rows,test_cols = grid.latlon2rowcol(test_lats,test_lons)
|
241 |
+
test_lats2,test_lons2 = grid.rowcol2latlon(test_rows,test_cols)
|
242 |
+
|
243 |
+
print(test_lons[:10])
|
244 |
+
print(test_lats[:10])
|
245 |
+
print(test_rows[:10])
|
246 |
+
print(test_cols[:10])
|
247 |
+
|
248 |
+
# Make line segments from the points to their corresponding grid points
|
249 |
+
lines = []
|
250 |
+
for i in range(len(test_lats)):
|
251 |
+
lines.append([(test_lons[i],test_lats[i]),(test_lons2[i],test_lats2[i])])
|
252 |
+
|
253 |
+
lines = gpd.GeoDataFrame(geometry=gpd.GeoSeries([LineString(line) for line in lines]))
|
254 |
+
|
255 |
+
lines.to_file(f'testlines_{dist}km.geojson',driver='GeoJSON')
|
256 |
+
grid.points.to_file(f'testgrid_{dist}km.geojson',driver='GeoJSON')
|
helpers/s2l1c_metadata.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e19d451d89510923c4bc3d8acf8c45985903a73c89539e431292d6226a4b5ddc
|
3 |
+
size 171721623
|
helpers/s2l2a_metadata.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a96c80bc43cb841b8400b05e80f4b477453b51a1e6833821333e3c11831e78b
|
3 |
+
size 173048695
|