stackstac.stack

Query Sentinel-2 data via stackstac into xarray.DataArray

Data will be written to a temporary directory to illustrate how to remove attributes, which is necessary if you want to write the object as a zarr file to disk.

from pathlib import Path
import sys
import tempfile

import numpy as np
import pystac_client
import stackstac

from bounding_box import *
print(sys.version)
print(pystac_client.__version__)
print(stackstac.__version__)
3.13.1 | packaged by conda-forge | (main, Dec  5 2024, 21:23:54) [GCC 13.3.0]
0.8.6
0.5.1
tmp_dir = tempfile.TemporaryDirectory()
out_dir = Path(tmp_dir.name)
catalog = pystac_client.Client.open("https://earth-search.aws.element84.com/v1/")

query = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=[lon_min, lat_min, lon_max, lat_max],
    datetime="2024-07-01",
)

items = list(query.items())
print(f"Found: {len(items):d} datasets")
Found: 2 datasets
ds = stackstac.stack(
    items,
    assets=["red", "green", "blue", "nir08"],
    resolution=10,
    xy_coords="center",
    epsg=32632
)
ds = ds.sel(x=slice(x_min, x_max), y=slice(y_max, y_min))
ds = ds.isel(time=[0]) # just take first timestep to reduce amount of data
ds
<xarray.DataArray 'stackstac-7f303e927bf4afe6baade82cf0bd2b4e' (time: 1,
                                                                band: 4,
                                                                y: 6675, x: 5329)> Size: 1GB
dask.array<getitem, shape=(1, 4, 6675, 5329), dtype=float64, chunksize=(1, 1, 1024, 1024), chunktype=numpy.ndarray>
Coordinates: (12/52)
  * time                                     (time) datetime64[ns] 8B 2024-07...
    id                                       (time) <U24 96B 'S2B_32TPT_20240...
  * band                                     (band) <U5 80B 'red' ... 'nir08'
  * x                                        (x) float64 43kB 6.217e+05 ... 6...
  * y                                        (y) float64 53kB 5.237e+06 ... 5...
    instruments                              <U3 12B 'msi'
    ...                                       ...
    raster:bands                             (band) object 32B {'nodata': 0, ...
    title                                    (band) <U21 336B 'Red (band 4) -...
    common_name                              (band) <U5 80B 'red' ... 'nir08'
    center_wavelength                        (band) float64 32B 0.665 ... 0.865
    full_width_half_max                      (band) float64 32B 0.038 ... 0.033
    epsg                                     int64 8B 32632
Attributes:
    spec:        RasterSpec(epsg=32632, bounds=(598150, 5087460, 713750, 5302...
    crs:         epsg:32632
    transform:   | 10.00, 0.00, 598150.00|\n| 0.00,-10.00, 5302570.00|\n| 0.0...
    resolution:  10

Remove or convert coordinates and attributes with dtype object

These can’t be serialized by zarr

ds["s2:nodata_pixel_percentage"] = ds["s2:nodata_pixel_percentage"].astype(np.float32)
for coord in ds.coords:
    if type(ds[coord].dtype) == np.dtypes.ObjectDType:
        print(coord)
processing:software
raster:bands
ds = ds.drop_vars(["raster:bands", "processing:software"])

Unify chunks

ds = ds.chunk(chunks={"time": 1, "x": 1000, "y": 1000})

Remove attributes - be careful, geotransform probably lost

ds.attrs
{'spec': RasterSpec(epsg=32632, bounds=(598150, 5087460, 713750, 5302570), resolutions_xy=(10, 10)),
 'crs': 'epsg:32632',
 'transform': Affine(10.0, 0.0, 598150.0,
        0.0, -10.0, 5302570.0),
 'resolution': 10}
ds.attrs = {}
ds.to_zarr(out_dir / f"s2_{ds.time.dt.strftime('%Y-%m-%d').values[0]}.zarr")

Clean up

tmp_dir.cleanup()