Query Sentinel-2 data via stackstac into xarray.DataArray
Data will be written to a temporary directory to illustrate how to remove attributes, which is necessary if you want to write the object as a zarr file to disk.
from pathlib import Path
import sys
import tempfile
import numpy as np
import pystac_client
import stackstac
from bounding_box import *
print (sys.version)
print (pystac_client.__version__)
print (stackstac.__version__)
3.13.1 | packaged by conda-forge | (main, Dec 5 2024, 21:23:54) [GCC 13.3.0]
0.8.6
0.5.1
tmp_dir = tempfile.TemporaryDirectory()
out_dir = Path(tmp_dir.name)
catalog = pystac_client.Client.open ("https://earth-search.aws.element84.com/v1/" )
query = catalog.search(
collections= ["sentinel-2-l2a" ],
bbox= [lon_min, lat_min, lon_max, lat_max],
datetime= "2024-07-01" ,
)
items = list (query.items())
print (f"Found: { len (items):d} datasets" )
ds = stackstac.stack(
items,
assets= ["red" , "green" , "blue" , "nir08" ],
resolution= 10 ,
xy_coords= "center" ,
epsg= 32632
)
ds = ds.sel(x= slice (x_min, x_max), y= slice (y_max, y_min))
ds = ds.isel(time= [0 ]) # just take first timestep to reduce amount of data
<xarray.DataArray 'stackstac-7f303e927bf4afe6baade82cf0bd2b4e' (time: 1,
band: 4,
y: 6675, x: 5329)> Size: 1GB
dask.array<getitem, shape=(1, 4, 6675, 5329), dtype=float64, chunksize=(1, 1, 1024, 1024), chunktype=numpy.ndarray>
Coordinates: (12/52)
* time (time) datetime64[ns] 8B 2024-07...
id (time) <U24 96B 'S2B_32TPT_20240...
* band (band) <U5 80B 'red' ... 'nir08'
* x (x) float64 43kB 6.217e+05 ... 6...
* y (y) float64 53kB 5.237e+06 ... 5...
instruments <U3 12B 'msi'
... ...
raster:bands (band) object 32B {'nodata': 0, ...
title (band) <U21 336B 'Red (band 4) -...
common_name (band) <U5 80B 'red' ... 'nir08'
center_wavelength (band) float64 32B 0.665 ... 0.865
full_width_half_max (band) float64 32B 0.038 ... 0.033
epsg int64 8B 32632
Attributes:
spec: RasterSpec(epsg=32632, bounds=(598150, 5087460, 713750, 5302...
crs: epsg:32632
transform: | 10.00, 0.00, 598150.00|\n| 0.00,-10.00, 5302570.00|\n| 0.0...
resolution: 10 dask.array<chunksize=(1, 1, 639, 716), meta=np.ndarray>
Bytes
1.06 GiB
8.00 MiB
Shape
(1, 4, 6675, 5329)
(1, 1, 1024, 1024)
Dask graph
168 chunks in 5 graph layers
Data type
float64 numpy.ndarray
1 1 5329 6675 4
Coordinates: (52)
time
(time)
datetime64[ns]
2024-07-01T10:17:48.675000
array(['2024-07-01T10:17:48.675000000'], dtype='datetime64[ns]') id
(time)
<U24
'S2B_32TPT_20240701_0_L2A'
array(['S2B_32TPT_20240701_0_L2A'], dtype='<U24') band
(band)
<U5
'red' 'green' 'blue' 'nir08'
array(['red', 'green', 'blue', 'nir08'], dtype='<U5') x
(x)
float64
6.217e+05 6.217e+05 ... 6.75e+05
array([621715., 621725., 621735., ..., 674975., 674985., 674995.],
shape=(5329,)) y
(y)
float64
5.237e+06 5.237e+06 ... 5.171e+06
array([5237275., 5237265., 5237255., ..., 5170555., 5170545., 5170535.],
shape=(6675,)) instruments
()
<U3
'msi'
array('msi', dtype='<U3') s2:datatake_type
()
<U8
'INS-NOBS'
array('INS-NOBS', dtype='<U8') mgrs:latitude_band
()
<U1
'T'
view:sun_azimuth
(time)
float64
147.6
s2:unclassified_percentage
(time)
float64
0.005876
updated
(time)
<U24
'2024-07-01T15:45:23.617Z'
array(['2024-07-01T15:45:23.617Z'], dtype='<U24') s2:medium_proba_clouds_percentage
(time)
float64
26.11
earthsearch:s3_path
(time)
<U79
's3://sentinel-cogs/sentinel-s2-...
array(['s3://sentinel-cogs/sentinel-s2-l2a-cogs/32/T/PT/2024/7/S2B_32TPT_20240701_0_L2A'],
dtype='<U79') s2:product_type
()
<U7
'S2MSI2A'
array('S2MSI2A', dtype='<U7') s2:datastrip_id
()
<U64
'S2B_OPER_MSI_L2A_DS_2BPS_202407...
array('S2B_OPER_MSI_L2A_DS_2BPS_20240701T125445_S20240701T101053_N05.10',
dtype='<U64') s2:degraded_msi_data_percentage
(time)
float64
0.0978
s2:processing_baseline
()
<U5
'05.10'
array('05.10', dtype='<U5') processing:software
()
object
{'sentinel2-to-stac': '0.1.1'}
array({'sentinel2-to-stac': '0.1.1'}, dtype=object) grid:code
(time)
<U10
'MGRS-32TPT'
array(['MGRS-32TPT'], dtype='<U10') mgrs:utm_zone
()
int64
32
s2:dark_features_percentage
(time)
float64
0.000479
s2:generation_time
()
<U27
'2024-07-01T12:54:45.000000Z'
array('2024-07-01T12:54:45.000000Z', dtype='<U27') s2:thin_cirrus_percentage
(time)
float64
2.272
proj:code
()
<U10
'EPSG:32632'
array('EPSG:32632', dtype='<U10') s2:high_proba_clouds_percentage
(time)
float64
69.49
created
(time)
<U24
'2024-07-01T15:45:23.617Z'
array(['2024-07-01T15:45:23.617Z'], dtype='<U24') constellation
()
<U10
'sentinel-2'
array('sentinel-2', dtype='<U10') s2:datatake_id
()
<U34
'GS2B_20240701T100559_038229_N05...
array('GS2B_20240701T100559_038229_N05.10', dtype='<U34') s2:snow_ice_percentage
(time)
float64
0.01595
mgrs:grid_square
(time)
<U2
'PT'
array(['PT'], dtype='<U2') platform
()
<U11
'sentinel-2b'
array('sentinel-2b', dtype='<U11') s2:granule_id
(time)
<U62
'S2B_OPER_MSI_L2A_TL_2BPS_202407...
array(['S2B_OPER_MSI_L2A_TL_2BPS_20240701T125445_A038229_T32TPT_N05.10'],
dtype='<U62') earthsearch:payload_id
(time)
<U74
'roda-sentinel2/workflow-sentine...
array(['roda-sentinel2/workflow-sentinel2-to-stac/cd2c9a0c8c33cff594b43f495a19f35e'],
dtype='<U74') s2:sequence
()
<U1
'0'
s2:nodata_pixel_percentage
(time)
object
8.589324
array([8.589324], dtype=object) s2:vegetation_percentage
(time)
float64
0.08036
s2:saturated_defective_pixel_percentage
()
int64
0
s2:product_uri
(time)
<U65
'S2B_MSIL2A_20240701T100559_N051...
array(['S2B_MSIL2A_20240701T100559_N0510_R022_T32TPT_20240701T125445.SAFE'],
dtype='<U65') eo:cloud_cover
(time)
float64
97.87
s2:cloud_shadow_percentage
(time)
float64
2.004
s2:not_vegetated_percentage
(time)
float64
0.01881
earthsearch:boa_offset_applied
()
bool
True
view:sun_elevation
(time)
float64
62.73
s2:water_percentage
(time)
float64
0.004639
s2:reflectance_conversion_factor
()
float64
0.9676
gsd
(band)
int64
10 10 10 20
raster:bands
(band)
object
{'nodata': 0, 'data_type': 'uint...
array([{'nodata': 0, 'data_type': 'uint16', 'bits_per_sample': 15, 'spatial_resolution': 10, 'scale': 0.0001, 'offset': -0.1},
{'nodata': 0, 'data_type': 'uint16', 'bits_per_sample': 15, 'spatial_resolution': 10, 'scale': 0.0001, 'offset': -0.1},
{'nodata': 0, 'data_type': 'uint16', 'bits_per_sample': 15, 'spatial_resolution': 10, 'scale': 0.0001, 'offset': -0.1},
{'nodata': 0, 'data_type': 'uint16', 'bits_per_sample': 15, 'spatial_resolution': 20, 'scale': 0.0001, 'offset': -0.1}],
dtype=object) title
(band)
<U21
'Red (band 4) - 10m' ... 'NIR 2 ...
array(['Red (band 4) - 10m', 'Green (band 3) - 10m',
'Blue (band 2) - 10m', 'NIR 2 (band 8A) - 20m'], dtype='<U21') common_name
(band)
<U5
'red' 'green' 'blue' 'nir08'
array(['red', 'green', 'blue', 'nir08'], dtype='<U5') center_wavelength
(band)
float64
0.665 0.56 0.49 0.865
array([0.665, 0.56 , 0.49 , 0.865]) full_width_half_max
(band)
float64
0.038 0.045 0.098 0.033
array([0.038, 0.045, 0.098, 0.033]) epsg
()
int64
32632
Indexes: (4)
PandasIndex
PandasIndex(DatetimeIndex(['2024-07-01 10:17:48.675000'], dtype='datetime64[ns]', name='time', freq=None)) PandasIndex
PandasIndex(Index(['red', 'green', 'blue', 'nir08'], dtype='object', name='band')) PandasIndex
PandasIndex(Index([621715.0, 621725.0, 621735.0, 621745.0, 621755.0, 621765.0, 621775.0,
621785.0, 621795.0, 621805.0,
...
674905.0, 674915.0, 674925.0, 674935.0, 674945.0, 674955.0, 674965.0,
674975.0, 674985.0, 674995.0],
dtype='float64', name='x', length=5329)) PandasIndex
PandasIndex(Index([5237275.0, 5237265.0, 5237255.0, 5237245.0, 5237235.0, 5237225.0,
5237215.0, 5237205.0, 5237195.0, 5237185.0,
...
5170625.0, 5170615.0, 5170605.0, 5170595.0, 5170585.0, 5170575.0,
5170565.0, 5170555.0, 5170545.0, 5170535.0],
dtype='float64', name='y', length=6675)) Attributes: (4)
spec : RasterSpec(epsg=32632, bounds=(598150, 5087460, 713750, 5302570), resolutions_xy=(10, 10)) crs : epsg:32632 transform : | 10.00, 0.00, 598150.00|
| 0.00,-10.00, 5302570.00|
| 0.00, 0.00, 1.00| resolution : 10
Remove or convert coordinates and attributes with dtype object
These can’t be serialized by zarr
ds["s2:nodata_pixel_percentage" ] = ds["s2:nodata_pixel_percentage" ].astype(np.float32)
for coord in ds.coords:
if type (ds[coord].dtype) == np.dtypes.ObjectDType:
print (coord)
processing:software
raster:bands
ds = ds.drop_vars(["raster:bands" , "processing:software" ])
Unify chunks
ds = ds.chunk(chunks= {"time" : 1 , "x" : 1000 , "y" : 1000 })