Temporal resampling/aggregation

import xarray as xr
import numpy as np
import pandas as pd

from util import generate_3d_dataset

Some random data

lat, lon, time = 40, 60, 120
ds = generate_3d_dataset(lat, lon, time)
ds
<xarray.Dataset> Size: 2MB
Dimensions:  (lat: 40, lon: 60, time: 120)
Coordinates:
  * lat      (lat) int64 320B 0 1 2 3 4 5 6 7 8 9 ... 31 32 33 34 35 36 37 38 39
  * lon      (lon) int64 480B 0 1 2 3 4 5 6 7 8 9 ... 51 52 53 54 55 56 57 58 59
  * time     (time) datetime64[ns] 960B 2021-01-01 2021-01-02 ... 2021-04-30
Data variables:
    test     (lat, lon, time) float64 2MB dask.array<chunksize=(4, 6, 120), meta=np.ndarray>

Resample to monthly interval

da_monthly_mean = ds.test.resample(time="1MS").mean()
def custom_agg_func(da: xr.DataArray) -> xr.DataArray:
    # dummy operation - could by anything
    return da.sum(dim="time") / da.count(dim="time")
da_monthly_mean_custom = ds.test.resample(time="1MS").apply(custom_agg_func)
assert (da_monthly_mean_custom == da_monthly_mean).all()